[llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 9 20:21:11 PST 2025
- Previous message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Next message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
https://github.com/hchandel updated https://github.com/llvm/llvm-project/pull/170829
>From 842d44e2876d2f47cc618c2bf1d0db70a5c71885 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Tue, 18 Nov 2025 16:35:10 +0530
Subject: [PATCH 01/11] [RISCV] Add short forward branch support for lb, lh,
lhu, lbu, and lw
Change-Id: Id95f1887590cccce6e0884703e9c46ca08864efa
---
.../Target/RISCV/RISCVExpandPseudoInsts.cpp | 10 +
llvm/lib/Target/RISCV/RISCVFeatures.td | 6 +
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 50 +-
llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td | 16 +
llvm/test/CodeGen/RISCV/features-info.ll | 1 +
.../RISCV/short-forward-branch-opt-load.ll | 755 ++++++++++++++++++
6 files changed, 835 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 60e0afdd99912..04394c37b6bf6 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -133,6 +133,11 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCMINU:
case RISCV::PseudoCCMUL:
case RISCV::PseudoCCLUI:
+ case RISCV::PseudoCCLB:
+ case RISCV::PseudoCCLH:
+ case RISCV::PseudoCCLW:
+ case RISCV::PseudoCCLHU:
+ case RISCV::PseudoCCLBU:
case RISCV::PseudoCCQC_LI:
case RISCV::PseudoCCQC_E_LI:
case RISCV::PseudoCCADDW:
@@ -243,6 +248,11 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
case RISCV::PseudoCCMUL: NewOpc = RISCV::MUL; break;
case RISCV::PseudoCCLUI: NewOpc = RISCV::LUI; break;
+ case RISCV::PseudoCCLB: NewOpc = RISCV::LB; break;
+ case RISCV::PseudoCCLH: NewOpc = RISCV::LH; break;
+ case RISCV::PseudoCCLW: NewOpc = RISCV::LW; break;
+ case RISCV::PseudoCCLHU: NewOpc = RISCV::LHU; break;
+ case RISCV::PseudoCCLBU: NewOpc = RISCV::LBU; break;
case RISCV::PseudoCCQC_LI: NewOpc = RISCV::QC_LI; break;
case RISCV::PseudoCCQC_E_LI: NewOpc = RISCV::QC_E_LI; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 0b964c4808d8a..7b21f6e1cefe0 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1864,6 +1864,12 @@ def TuneShortForwardBranchIMul
"true", "Enable short forward branch optimization for mul instruction",
[TuneShortForwardBranchOpt]>;
+
+def TuneShortForwardBranchILoad
+ : SubtargetFeature<"short-forward-branch-i-load", "HasShortForwardBranchILoad",
+ "true", "Enable short forward branch optimization for load instructions",
+ [TuneShortForwardBranchOpt]>;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index fb914e97e2229..d6953c24a8955 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1708,6 +1708,11 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::MINU: return RISCV::PseudoCCMINU;
case RISCV::MUL: return RISCV::PseudoCCMUL;
case RISCV::LUI: return RISCV::PseudoCCLUI;
+ case RISCV::LB: return RISCV::PseudoCCLB;
+ case RISCV::LBU: return RISCV::PseudoCCLBU;
+ case RISCV::LH: return RISCV::PseudoCCLH;
+ case RISCV::LHU: return RISCV::PseudoCCLHU;
+ case RISCV::LW: return RISCV::PseudoCCLW;
case RISCV::QC_LI: return RISCV::PseudoCCQC_LI;
case RISCV::QC_E_LI: return RISCV::PseudoCCQC_E_LI;
@@ -1747,7 +1752,8 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
static MachineInstr *canFoldAsPredicatedOp(Register Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII,
- const RISCVSubtarget &STI) {
+ const RISCVSubtarget &STI,
+ const MachineInstr *UseMI) {
if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@@ -1761,6 +1767,12 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
return nullptr;
+ if (!STI.hasShortForwardBranchILoad() &&
+ (MI->getOpcode() == RISCV::LB || MI->getOpcode() == RISCV::LBU ||
+ MI->getOpcode() == RISCV::LW || MI->getOpcode() == RISCV::LH ||
+ MI->getOpcode() == RISCV::LHU))
+ return nullptr;
+
if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
return nullptr;
@@ -1788,6 +1800,37 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
return nullptr;
}
bool DontMoveAcrossStores = true;
+
+ if (MI->getOpcode() == RISCV::LB || MI->getOpcode() == RISCV::LBU ||
+ MI->getOpcode() == RISCV::LW || MI->getOpcode() == RISCV::LH ||
+ MI->getOpcode() == RISCV::LHU) {
+ if (MI && UseMI && MI->getParent() == UseMI->getParent()) {
+ // For the simple case, when both the def and use of Load are in the same
+ // basic block, instructions can be scanned linearly if there are any
+ // stores between def and use.
+ auto &MBB = *MI->getParent();
+ DontMoveAcrossStores = false;
+
+ auto DefIt = MBB.begin();
+ auto UseIt = MBB.begin();
+
+ for (auto It = MBB.begin(); It != MBB.end(); ++It) {
+ if (&*It == MI)
+ DefIt = It;
+ if (&*It == UseMI)
+ UseIt = It;
+ }
+ if (DefIt != MBB.end() && UseIt != MBB.end() && DefIt != UseIt) {
+ for (auto I = std::next(DefIt); I != UseIt; ++I) {
+ if (I->mayStore()) {
+ DontMoveAcrossStores = true;
+ LLVM_DEBUG(dbgs() << "Store found between def and use\n");
+ }
+ }
+ }
+ }
+ }
+
if (!MI->isSafeToMove(DontMoveAcrossStores))
return nullptr;
return MI;
@@ -1827,10 +1870,11 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI =
- canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI);
+ canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI, &MI);
bool Invert = !DefMI;
if (!DefMI)
- DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI);
+ DefMI =
+ canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI, &MI);
if (!DefMI)
return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 5b1c13493bbf2..e7fca38cf5dbe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -80,6 +80,17 @@ class SFBLUI
let Constraints = "$dst = $falsev";
}
+class SFBLoad
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
+ simm12_lo:$imm), []> {
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
class SFBShift_ri
: Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
@@ -122,6 +133,11 @@ def PseudoCCMIN : SFBALU_rr;
def PseudoCCMAXU : SFBALU_rr;
def PseudoCCMINU : SFBALU_rr;
def PseudoCCMUL : SFBALU_rr;
+def PseudoCCLB : SFBLoad;
+def PseudoCCLH : SFBLoad;
+def PseudoCCLW : SFBLoad;
+def PseudoCCLHU : SFBLoad;
+def PseudoCCLBU : SFBLoad;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 3d9906fdcbeb3..9e6be44272821 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -137,6 +137,7 @@
; CHECK-NEXT: shgatpa - 'Shgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare).
; CHECK-NEXT: shifted-zextw-fusion - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension.
; CHECK-NEXT: shlcofideleg - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode).
+; CHECK-NEXT: short-forward-branch-i-load - Enable short forward branch optimization for load instructions.
; CHECK-NEXT: short-forward-branch-i-minmax - Enable short forward branch optimization for min,max instructions in Zbb.
; CHECK-NEXT: short-forward-branch-i-mul - Enable short forward branch optimization for mul instruction.
; CHECK-NEXT: short-forward-branch-opt - Enable short forward branch optimization.
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
new file mode 100644
index 0000000000000..4f0a0861721f6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
@@ -0,0 +1,755 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_s:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB0_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lb a2, 4(a0)
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB0_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lb a2, 4(a0)
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_z:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lbu a2, 4(a0)
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB1_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lbu a2, 4(a0)
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_s:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB2_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lh a2, 8(a0)
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB2_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lh a2, 8(a0)
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_z:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lhu a2, 8(a0)
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB3_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lhu a2, 8(a0)
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i32:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB4_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw a2, 16(a0)
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB4_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lw a2, 16(a0)
+; RV64I-NEXT: .LBB4_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB4_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB4_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_s_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB5_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lb a2, 4(a0)
+; RV32I-NEXT: srai a3, a2, 31
+; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB5_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lb a2, 4(a0)
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB5_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB5_4: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB5_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_z_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lbu a2, 4(a0)
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB6_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lbu a2, 4(a0)
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB6_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB6_4: # %entry
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB6_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: .LBB6_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_s_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lh a2, 8(a0)
+; RV32I-NEXT: srai a3, a2, 31
+; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB7_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lh a2, 8(a0)
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB7_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB7_4: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB7_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_z_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB8_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lhu a2, 8(a0)
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB8_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lhu a2, 8(a0)
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB8_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB8_4: # %entry
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB8_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: .LBB8_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i64_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB9_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw a2, 32(a0)
+; RV32I-NEXT: lw a3, 36(a0)
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB9_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: ld a2, 32(a0)
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a4, 32(a0)
+; RV32I-SFB-NEXT: lw a5, 36(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a4, a2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB9_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a5, a3
+; RV32I-SFB-NEXT: .LBB9_4: # %entry
+; RV32I-SFB-NEXT: mv a0, a4
+; RV32I-SFB-NEXT: mv a1, a5
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB9_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 36(a0)
+; RV32I-SFBILOAD-NEXT: .LBB9_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load i64, ptr %addr ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
>From 713b119a46ae98103eb9f369e95003939bf4593e Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Fri, 21 Nov 2025 16:48:09 +0530
Subject: [PATCH 02/11] fixup! Changes to reuse existing support for Load
folding
Change-Id: Iba30a2d81f79b0b99bf718252a8ad4c4e331c03c
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 108 ++++++++++++++---------
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 5 ++
2 files changed, 71 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index d6953c24a8955..3a3e6f39c9df4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -888,6 +888,72 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
.addImm(0);
}
+unsigned getLoadPredicatedOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case RISCV::LB:
+ return RISCV::PseudoCCLB;
+ case RISCV::LBU:
+ return RISCV::PseudoCCLBU;
+ case RISCV::LH:
+ return RISCV::PseudoCCLH;
+ case RISCV::LHU:
+ return RISCV::PseudoCCLHU;
+ case RISCV::LW:
+ return RISCV::PseudoCCLW;
+ default:
+ return 0;
+ }
+}
+
+MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
+ assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
+ "Unknown select instruction");
+ if (!STI.hasShortForwardBranchILoad() ||
+ (LoadMI.getOpcode() != RISCV::LB && LoadMI.getOpcode() != RISCV::LBU &&
+ LoadMI.getOpcode() != RISCV::LH && LoadMI.getOpcode() != RISCV::LHU &&
+ LoadMI.getOpcode() != RISCV::LW))
+ return nullptr;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool Invert =
+ (MRI.getVRegDef(MI.getOperand(4).getReg()) == &LoadMI) ? true : false;
+ MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
+ Register DestReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return nullptr;
+
+ unsigned PredOpc = getLoadPredicatedOpcode(LoadMI.getOpcode());
+ assert(PredOpc != 0 && "Unexpected opcode!");
+
+ // Create a new predicated version of DefMI.
+ MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), InsertPt,
+ MI.getDebugLoc(), get(PredOpc), DestReg);
+
+ // Copy the condition portion.
+ NewMI.add(MI.getOperand(1));
+ NewMI.add(MI.getOperand(2));
+
+ // Add condition code, inverting if necessary.
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+ if (Invert)
+ CC = RISCVCC::getInverseBranchCondition(CC);
+ NewMI.addImm(CC);
+
+ // Copy the false register.
+ NewMI.add(FalseReg);
+
+ // Copy all the DefMI operands.
+ const MCInstrDesc &DefDesc = LoadMI.getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
+ NewMI.add(LoadMI.getOperand(i));
+
+ return NewMI;
+}
+
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
@@ -1708,11 +1774,6 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::MINU: return RISCV::PseudoCCMINU;
case RISCV::MUL: return RISCV::PseudoCCMUL;
case RISCV::LUI: return RISCV::PseudoCCLUI;
- case RISCV::LB: return RISCV::PseudoCCLB;
- case RISCV::LBU: return RISCV::PseudoCCLBU;
- case RISCV::LH: return RISCV::PseudoCCLH;
- case RISCV::LHU: return RISCV::PseudoCCLHU;
- case RISCV::LW: return RISCV::PseudoCCLW;
case RISCV::QC_LI: return RISCV::PseudoCCQC_LI;
case RISCV::QC_E_LI: return RISCV::PseudoCCQC_E_LI;
@@ -1767,12 +1828,6 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
return nullptr;
- if (!STI.hasShortForwardBranchILoad() &&
- (MI->getOpcode() == RISCV::LB || MI->getOpcode() == RISCV::LBU ||
- MI->getOpcode() == RISCV::LW || MI->getOpcode() == RISCV::LH ||
- MI->getOpcode() == RISCV::LHU))
- return nullptr;
-
if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
return nullptr;
@@ -1800,37 +1855,6 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
return nullptr;
}
bool DontMoveAcrossStores = true;
-
- if (MI->getOpcode() == RISCV::LB || MI->getOpcode() == RISCV::LBU ||
- MI->getOpcode() == RISCV::LW || MI->getOpcode() == RISCV::LH ||
- MI->getOpcode() == RISCV::LHU) {
- if (MI && UseMI && MI->getParent() == UseMI->getParent()) {
- // For the simple case, when both the def and use of Load are in the same
- // basic block, instructions can be scanned linearly if there are any
- // stores between def and use.
- auto &MBB = *MI->getParent();
- DontMoveAcrossStores = false;
-
- auto DefIt = MBB.begin();
- auto UseIt = MBB.begin();
-
- for (auto It = MBB.begin(); It != MBB.end(); ++It) {
- if (&*It == MI)
- DefIt = It;
- if (&*It == UseMI)
- UseIt = It;
- }
- if (DefIt != MBB.end() && UseIt != MBB.end() && DefIt != UseIt) {
- for (auto I = std::next(DefIt); I != UseIt; ++I) {
- if (I->mayStore()) {
- DontMoveAcrossStores = true;
- LLVM_DEBUG(dbgs() << "Store found between def and use\n");
- }
- }
- }
- }
- }
-
if (!MI->isSafeToMove(DontMoveAcrossStores))
return nullptr;
return MI;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 0ffe015b9fac8..908da393535fe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -133,6 +133,11 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const override;
+ MachineInstr *foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS = nullptr) const override;
+
// Materializes the given integer Val into DstReg.
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
>From 43e941eacd5b38982a1e48715b3db388b8770c91 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Fri, 21 Nov 2025 16:52:09 +0530
Subject: [PATCH 03/11] fixup! Remove redundant parameters
Change-Id: Ib3259359f6c1cd5dc0e81503c9895393a48cef88
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 3a3e6f39c9df4..0bd42a2633888 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1813,8 +1813,7 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
static MachineInstr *canFoldAsPredicatedOp(Register Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII,
- const RISCVSubtarget &STI,
- const MachineInstr *UseMI) {
+ const RISCVSubtarget &STI) {
if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@@ -1894,11 +1893,10 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI =
- canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI, &MI);
+ canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI);
bool Invert = !DefMI;
if (!DefMI)
- DefMI =
- canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI, &MI);
+ DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI);
if (!DefMI)
return nullptr;
>From ac5c60117b6bf811fecbcba10eea9928d57a4be2 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Tue, 25 Nov 2025 18:08:29 +0530
Subject: [PATCH 04/11] fixup! Remove assert and add test cases
Change-Id: I38375cba5c5897a2f3e0a3e8ba7909865e466f74
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 6 +-
.../RISCV/short-forward-branch-opt-load.ll | 1162 ++++++++++++++---
2 files changed, 1003 insertions(+), 165 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 0bd42a2633888..f5c65410a3eba 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -909,8 +909,10 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
LiveIntervals *LIS) const {
- assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
- "Unknown select instruction");
+ // For now, only handle RISCV::PseudoCCMOVGPR.
+ if (MI.getOpcode() != RISCV::PseudoCCMOVGPR)
+ return nullptr;
+
if (!STI.hasShortForwardBranchILoad() ||
(LoadMI.getOpcode() != RISCV::LB && LoadMI.getOpcode() != RISCV::LBU &&
LoadMI.getOpcode() != RISCV::LH && LoadMI.getOpcode() != RISCV::LHU &&
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
index 4f0a0861721f6..9ed1218cf7fb5 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
@@ -349,407 +349,1243 @@ entry:
ret i32 %res
}
+define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_s_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB5_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_z_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB6_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_s_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB7_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_z_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i32_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB9_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) {
; RV32I-LABEL: test_i8_s_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: beqz a1, .LBB5_2
+; RV32I-NEXT: beqz a1, .LBB10_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lb a2, 4(a0)
+; RV32I-NEXT: srai a3, a2, 31
+; RV32I-NEXT: .LBB10_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB10_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lb a2, 4(a0)
+; RV64I-NEXT: .LBB10_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB10_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB10_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB10_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB10_4: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB10_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB10_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_z_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lbu a2, 4(a0)
+; RV32I-NEXT: .LBB11_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB11_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lbu a2, 4(a0)
+; RV64I-NEXT: .LBB11_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB11_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB11_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB11_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB11_4: # %entry
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB11_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB11_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB11_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB11_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_s_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB12_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lh a2, 8(a0)
+; RV32I-NEXT: srai a3, a2, 31
+; RV32I-NEXT: .LBB12_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB12_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lh a2, 8(a0)
+; RV64I-NEXT: .LBB12_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB12_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB12_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB12_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB12_4: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB12_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB12_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_z_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB13_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lhu a2, 8(a0)
+; RV32I-NEXT: .LBB13_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB13_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lhu a2, 8(a0)
+; RV64I-NEXT: .LBB13_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB13_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB13_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB13_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB13_4: # %entry
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB13_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB13_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB13_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB13_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i64_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB14_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lb a2, 4(a0)
-; RV32I-NEXT: srai a3, a2, 31
-; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: lw a2, 32(a0)
+; RV32I-NEXT: lw a3, 36(a0)
+; RV32I-NEXT: .LBB14_2: # %entry
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_i8_s_1:
+; RV64I-LABEL: test_i64_1:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: beqz a1, .LBB5_2
+; RV64I-NEXT: beqz a1, .LBB14_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lb a2, 4(a0)
-; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: ld a2, 32(a0)
+; RV64I-NEXT: .LBB14_2: # %entry
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
-; RV32I-SFB-LABEL: test_i8_s_1:
+; RV32I-SFB-LABEL: test_i64_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a4, 32(a0)
+; RV32I-SFB-NEXT: lw a5, 36(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB14_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a4, a2
+; RV32I-SFB-NEXT: .LBB14_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB14_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a5, a3
+; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: mv a0, a4
+; RV32I-SFB-NEXT: mv a1, a5
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB14_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB14_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
+; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB14_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 36(a0)
+; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load i64, ptr %addr ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_s_store_64:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB15_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB15_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_64:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB15_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB15_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lb a0, 4(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB5_2
+; RV32I-SFB-NEXT: beqz a1, .LBB15_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB5_4
+; RV32I-SFB-NEXT: .LBB15_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB15_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB5_4: # %entry
+; RV32I-SFB-NEXT: .LBB15_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
-; RV64I-SFB-LABEL: test_i8_s_1:
+; RV64I-SFB-LABEL: test_i8_s_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lb a0, 4(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB5_2
+; RV64I-SFB-NEXT: bnez a1, .LBB15_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: .LBB15_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
-; RV32I-SFBILOAD-LABEL: test_i8_s_1:
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB15_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_4
+; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB15_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB5_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
-; RV64I-SFBILOAD-LABEL: test_i8_s_1:
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB5_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
-; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
%val = load i8, ptr %addr ; load 8-bit value
%ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
%res = select i1 %x, i64 %ext, i64 %b
ret i64 %res
}
-define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) {
-; RV32I-LABEL: test_i8_z_1:
+define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_z_store_64:
; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: beqz a1, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lbu a2, 4(a0)
-; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB16_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB16_2: # %entry
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_i8_z_1:
+; RV64I-LABEL: test_i8_z_store_64:
; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: beqz a1, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lbu a2, 4(a0)
-; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB16_2
+; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB16_2: # %entry
; RV64I-NEXT: ret
;
-; RV32I-SFB-LABEL: test_i8_z_1:
+; RV32I-SFB-LABEL: test_i8_z_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lbu a0, 4(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB6_2
+; RV32I-SFB-NEXT: beqz a1, .LBB16_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB6_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB6_4: # %entry
+; RV32I-SFB-NEXT: .LBB16_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB16_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB16_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
-; RV64I-SFB-LABEL: test_i8_z_1:
+; RV64I-SFB-LABEL: test_i8_z_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lbu a0, 4(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB6_2
+; RV64I-SFB-NEXT: bnez a1, .LBB16_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: .LBB16_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
-; RV32I-SFBILOAD-LABEL: test_i8_z_1:
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB6_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
-; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB6_4
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB16_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: .LBB6_4: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
-; RV64I-SFBILOAD-LABEL: test_i8_z_1:
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB6_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
-; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
%val = load i8, ptr %addr ; load 8-bit value
%ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
%res = select i1 %x, i64 %ext, i64 %b
ret i64 %res
}
-define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) {
-; RV32I-LABEL: test_i16_s_1:
+define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_s_store_64:
; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: beqz a1, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lh a2, 8(a0)
-; RV32I-NEXT: srai a3, a2, 31
-; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB17_2
+; RV32I-NEXT: # %bb.1: # %entry
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB17_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_i16_s_1:
+; RV64I-LABEL: test_i16_s_store_64:
; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: beqz a1, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lh a2, 8(a0)
-; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB17_2
+; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB17_2: # %entry
; RV64I-NEXT: ret
;
-; RV32I-SFB-LABEL: test_i16_s_1:
+; RV32I-SFB-LABEL: test_i16_s_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lh a0, 8(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB7_2
+; RV32I-SFB-NEXT: beqz a1, .LBB17_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB7_4
+; RV32I-SFB-NEXT: .LBB17_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB17_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB7_4: # %entry
+; RV32I-SFB-NEXT: .LBB17_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
-; RV64I-SFB-LABEL: test_i16_s_1:
+; RV64I-SFB-LABEL: test_i16_s_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lh a0, 8(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB7_2
+; RV64I-SFB-NEXT: bnez a1, .LBB17_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: .LBB17_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
-; RV32I-SFBILOAD-LABEL: test_i16_s_1:
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_4
+; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB7_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
-; RV64I-SFBILOAD-LABEL: test_i16_s_1:
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB7_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
-; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
%val = load i16, ptr %addr ; load 16-bit value
%ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
%res = select i1 %x, i64 %ext, i64 %b
ret i64 %res
}
-define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) {
-; RV32I-LABEL: test_i16_z_1:
+define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_z_store_64:
; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: beqz a1, .LBB8_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lhu a2, 8(a0)
-; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB18_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB18_2: # %entry
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_i16_z_1:
+; RV64I-LABEL: test_i16_z_store_64:
; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: beqz a1, .LBB8_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lhu a2, 8(a0)
-; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB18_2
+; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB18_2: # %entry
; RV64I-NEXT: ret
;
-; RV32I-SFB-LABEL: test_i16_z_1:
+; RV32I-SFB-LABEL: test_i16_z_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lhu a0, 8(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB8_2
+; RV32I-SFB-NEXT: beqz a1, .LBB18_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB8_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB8_4: # %entry
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB18_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
-; RV64I-SFB-LABEL: test_i16_z_1:
+; RV64I-SFB-LABEL: test_i16_z_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lhu a0, 8(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB8_2
+; RV64I-SFB-NEXT: bnez a1, .LBB18_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: .LBB18_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
-; RV32I-SFBILOAD-LABEL: test_i16_z_1:
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB8_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
-; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB8_4
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB18_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: .LBB8_4: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
-; RV64I-SFBILOAD-LABEL: test_i16_z_1:
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB8_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
-; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
%val = load i16, ptr %addr ; load 16-bit value
%ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
%res = select i1 %x, i64 %ext, i64 %b
ret i64 %res
}
-define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
-; RV32I-LABEL: test_i64_1:
+define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i64_store_64:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: beqz a1, .LBB9_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lw a2, 32(a0)
-; RV32I-NEXT: lw a3, 36(a0)
-; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: mv a7, a1
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: lw a0, 32(a0)
+; RV32I-NEXT: lw a1, 36(a1)
+; RV32I-NEXT: andi a7, a7, 1
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a7, .LBB19_2
+; RV32I-NEXT: # %bb.1: # %entry
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: .LBB19_2: # %entry
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_i64_1:
+; RV64I-LABEL: test_i64_store_64:
; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: ld a0, 32(a0)
; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: beqz a1, .LBB9_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: ld a2, 32(a0)
-; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB19_2
+; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB19_2: # %entry
; RV64I-NEXT: ret
;
-; RV32I-SFB-LABEL: test_i64_1:
+; RV32I-SFB-LABEL: test_i64_store_64:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a4, 32(a0)
-; RV32I-SFB-NEXT: lw a5, 36(a0)
+; RV32I-SFB-NEXT: lw a7, 32(a0)
+; RV32I-SFB-NEXT: lw t0, 36(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB9_2
+; RV32I-SFB-NEXT: bnez a1, .LBB19_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a4, a2
-; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB9_4
+; RV32I-SFB-NEXT: mv a7, a2
+; RV32I-SFB-NEXT: .LBB19_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB19_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a5, a3
-; RV32I-SFB-NEXT: .LBB9_4: # %entry
-; RV32I-SFB-NEXT: mv a0, a4
-; RV32I-SFB-NEXT: mv a1, a5
+; RV32I-SFB-NEXT: mv t0, a3
+; RV32I-SFB-NEXT: .LBB19_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a7
+; RV32I-SFB-NEXT: mv a1, t0
; RV32I-SFB-NEXT: ret
;
-; RV64I-SFB-LABEL: test_i64_1:
+; RV64I-SFB-LABEL: test_i64_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: ld a0, 32(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB9_2
+; RV64I-SFB-NEXT: bnez a1, .LBB19_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: .LBB19_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
-; RV32I-SFBILOAD-LABEL: test_i64_1:
+; RV32I-SFBILOAD-LABEL: test_i64_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a7, 32(a0)
+; RV32I-SFBILOAD-NEXT: lw t0, 36(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB9_2
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
-; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB9_4
+; RV32I-SFBILOAD-NEXT: mv a7, a2
+; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: lw a3, 36(a0)
-; RV32I-SFBILOAD-NEXT: .LBB9_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: mv t0, a3
+; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a7
+; RV32I-SFBILOAD-NEXT: mv a1, t0
; RV32I-SFBILOAD-NEXT: ret
;
-; RV64I-SFBILOAD-LABEL: test_i64_1:
+; RV64I-SFBILOAD-LABEL: test_i64_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
%val = load i64, ptr %addr ; load 64-bit value
+ store i64 %c, ptr %base1
%res = select i1 %x, i64 %val, i64 %b
ret i64 %res
}
+
>From b6b2619ec3ec8d0f8af7d9aab92280c346f193c6 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Thu, 4 Dec 2025 14:20:44 +0530
Subject: [PATCH 05/11] fixup! Add support for LD and LWU and other test cases
Change-Id: Ie893377a347c6be3f39868cced6c099950b17bdd
---
.../Target/RISCV/RISCVExpandPseudoInsts.cpp | 7 +-
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 7 +-
llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td | 2 +
...-forward-branch-opt-load-atomic-acquire.ll | 5379 +++++++++++++++++
...orward-branch-opt-load-atomic-monotonic.ll | 5379 +++++++++++++++++
...-forward-branch-opt-load-atomic-seq_cst.ll | 5379 +++++++++++++++++
.../short-forward-branch-opt-load-volatile.ll | 1022 ++++
.../RISCV/short-forward-branch-opt-load.ll | 364 +-
8 files changed, 17440 insertions(+), 99 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
create mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
create mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
create mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 04394c37b6bf6..a18aad25ae745 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -138,6 +138,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCLW:
case RISCV::PseudoCCLHU:
case RISCV::PseudoCCLBU:
+ case RISCV::PseudoCCLWU:
+ case RISCV::PseudoCCLD:
case RISCV::PseudoCCQC_LI:
case RISCV::PseudoCCQC_E_LI:
case RISCV::PseudoCCADDW:
@@ -253,6 +255,8 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCLW: NewOpc = RISCV::LW; break;
case RISCV::PseudoCCLHU: NewOpc = RISCV::LHU; break;
case RISCV::PseudoCCLBU: NewOpc = RISCV::LBU; break;
+ case RISCV::PseudoCCLWU: NewOpc = RISCV::LWU; break;
+ case RISCV::PseudoCCLD: NewOpc = RISCV::LD; break;
case RISCV::PseudoCCQC_LI: NewOpc = RISCV::QC_LI; break;
case RISCV::PseudoCCQC_E_LI: NewOpc = RISCV::QC_E_LI; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
@@ -290,7 +294,8 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
} else {
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
.add(MI.getOperand(5))
- .add(MI.getOperand(6));
+ .add(MI.getOperand(6))
+ .cloneMemRefs(MI);
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index f5c65410a3eba..b1c2e7e991c3e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -900,6 +900,10 @@ unsigned getLoadPredicatedOpcode(unsigned Opcode) {
return RISCV::PseudoCCLHU;
case RISCV::LW:
return RISCV::PseudoCCLW;
+ case RISCV::LWU:
+ return RISCV::PseudoCCLWU;
+ case RISCV::LD:
+ return RISCV::PseudoCCLD;
default:
return 0;
}
@@ -916,7 +920,8 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
if (!STI.hasShortForwardBranchILoad() ||
(LoadMI.getOpcode() != RISCV::LB && LoadMI.getOpcode() != RISCV::LBU &&
LoadMI.getOpcode() != RISCV::LH && LoadMI.getOpcode() != RISCV::LHU &&
- LoadMI.getOpcode() != RISCV::LW))
+ LoadMI.getOpcode() != RISCV::LW && LoadMI.getOpcode() != RISCV::LWU &&
+ LoadMI.getOpcode() != RISCV::LD))
return nullptr;
MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index e7fca38cf5dbe..bcb81d14ed36f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -138,6 +138,8 @@ def PseudoCCLH : SFBLoad;
def PseudoCCLW : SFBLoad;
def PseudoCCLHU : SFBLoad;
def PseudoCCLBU : SFBLoad;
+def PseudoCCLWU : SFBLoad;
+def PseudoCCLD : SFBLoad;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
new file mode 100644
index 0000000000000..51f2643c94191
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
@@ -0,0 +1,5379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s_3(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB0_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB0_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_3(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB1_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_3(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB2_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB2_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_3(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB3_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_3(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i32_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: bnez s1, .LBB4_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: bnez s1, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB4_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s1, .LBB4_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s1, .LBB4_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_s_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB5_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB5_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s3, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_z_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB6_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s3, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_s_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB7_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_z_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB8_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB8_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i32_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: bnez s3, .LBB9_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s3, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s3, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1_3(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_s_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB10_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB10_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB10_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB10_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s2, .LBB10_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: .LBB10_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB10_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB10_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB10_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB10_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1_3(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB11_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB11_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB11_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s2, .LBB11_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB11_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB11_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: .LBB11_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB11_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB11_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1_3(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_s_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB12_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB12_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB12_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB12_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB12_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: .LBB12_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB12_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB12_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB12_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB12_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1_3(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB13_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB13_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB13_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB13_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB13_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB13_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB13_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: .LBB13_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB13_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB13_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i32_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: beqz s2, .LBB14_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: .LBB14_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: beqz s1, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: .LBB14_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: addi a2, sp, 12
+; RV32I-SFB-NEXT: li a3, 2
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 12(sp)
+; RV32I-SFB-NEXT: bnez s2, .LBB14_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB14_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB14_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 2
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s1, .LBB14_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
+; RV32I-SFBILOAD-NEXT: li a3, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1_3(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i64_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: bnez s2, .LBB15_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB15_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: bnez s1, .LBB15_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB15_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: bnez s2, .LBB15_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB15_2: # %entry
+; RV32I-SFB-NEXT: bnez s2, .LBB15_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: .LBB15_4: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s1, .LBB15_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB15_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr acquire, align 8 ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_s_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB16_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB16_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB16_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB16_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s5, .LBB16_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: .LBB16_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB16_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB16_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB16_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB16_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB16_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_z_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB17_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB17_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB17_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB17_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s5, .LBB17_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB17_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB17_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: .LBB17_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB17_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB17_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB17_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_s_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB18_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB18_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB18_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB18_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB18_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 16
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB18_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB18_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB18_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB18_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_z_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB19_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB19_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB19_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB19_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB19_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB19_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB19_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: .LBB19_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB19_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB19_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB19_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i32_z_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: mv a2, sp
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB20_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: .LBB20_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: lwu a0, 4(sp)
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB20_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB20_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: mv a2, sp
+; RV32I-SFB-NEXT: li a3, 2
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 0(sp)
+; RV32I-SFB-NEXT: beqz s5, .LBB20_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB20_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB20_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB20_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 2
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s3, .LBB20_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB20_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: mv a2, sp
+; RV32I-SFBILOAD-NEXT: li a3, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i64_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB21_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB21_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB21_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB21_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: bnez s5, .LBB21_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB21_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB21_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: .LBB21_4: # %entry
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 2
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s3, .LBB21_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB21_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB21_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr acquire, align 8 ; load 64-bit value
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
new file mode 100644
index 0000000000000..c2564e6ac654f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
@@ -0,0 +1,5379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s_2(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_s_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB0_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB0_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_2(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_z_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB1_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_2(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_s_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB2_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB2_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_2(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_z_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB3_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_2(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i32_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: bnez s1, .LBB4_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: bnez s1, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB4_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s1, .LBB4_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s1, .LBB4_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_s_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB5_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB5_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s3, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_z_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB6_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s3, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_s_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB7_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_z_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB8_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB8_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i32_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: bnez s3, .LBB9_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s3, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s3, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1_2(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_s_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB10_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB10_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB10_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB10_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s2, .LBB10_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: .LBB10_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB10_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB10_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB10_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB10_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1_2(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_z_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB11_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB11_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB11_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s2, .LBB11_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB11_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB11_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: .LBB11_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB11_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB11_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1_2(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_s_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB12_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB12_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB12_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB12_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB12_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: .LBB12_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB12_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB12_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB12_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB12_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1_2(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_z_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB13_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB13_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB13_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB13_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB13_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB13_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB13_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: .LBB13_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB13_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB13_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i32_z_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: beqz s2, .LBB14_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: .LBB14_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: beqz s1, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: .LBB14_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: addi a2, sp, 12
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 12(sp)
+; RV32I-SFB-NEXT: bnez s2, .LBB14_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB14_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB14_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 0
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s1, .LBB14_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1_2(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i64_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: bnez s2, .LBB15_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB15_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: bnez s1, .LBB15_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB15_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: bnez s2, .LBB15_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB15_2: # %entry
+; RV32I-SFB-NEXT: bnez s2, .LBB15_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: .LBB15_4: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s1, .LBB15_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB15_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr monotonic, align 8 ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_s_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB16_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB16_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB16_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB16_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s5, .LBB16_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: .LBB16_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB16_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB16_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB16_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB16_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB16_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_z_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB17_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB17_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB17_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB17_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s5, .LBB17_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB17_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB17_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: .LBB17_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB17_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB17_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB17_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_s_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB18_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB18_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB18_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB18_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB18_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 16
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB18_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB18_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB18_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB18_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_z_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB19_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB19_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB19_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB19_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB19_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB19_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB19_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: .LBB19_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB19_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB19_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB19_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i32_z_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: mv a2, sp
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB20_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: .LBB20_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: lwu a0, 4(sp)
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB20_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB20_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: mv a2, sp
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 0(sp)
+; RV32I-SFB-NEXT: beqz s5, .LBB20_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB20_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB20_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB20_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 0
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s3, .LBB20_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB20_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: mv a2, sp
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i64_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB21_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB21_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB21_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB21_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: bnez s5, .LBB21_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB21_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB21_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: .LBB21_4: # %entry
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s3, .LBB21_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB21_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB21_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr monotonic, align 8 ; load 64-bit value
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
new file mode 100644
index 0000000000000..9308fa38d95bd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
@@ -0,0 +1,5379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s_4(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_s_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB0_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB0_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_4(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i8_z_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB1_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_4(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_s_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB2_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB2_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_4(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i16_z_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB3_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_4(ptr %base, i1 %x, i32 %b) {
+; RV32I-LABEL: test_i32_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s1, a1, 1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: bnez s1, .LBB4_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: bnez s1, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB4_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: andi s1, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s1, .LBB4_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s1, .LBB4_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_s_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB5_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB5_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s3, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i8_z_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB6_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s3, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_s_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB7_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i16_z_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB8_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB8_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+; RV32I-LABEL: test_i32_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: andi s3, a1, 1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: bnez s3, .LBB9_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: andi s3, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s3, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s3, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1_4(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_s_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB10_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB10_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB10_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB10_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s2, .LBB10_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: .LBB10_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB10_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB10_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB10_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB10_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1_4(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i8_z_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB11_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB11_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB11_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s2, .LBB11_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB11_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB11_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: .LBB11_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB11_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB11_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1_4(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_s_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB12_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB12_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB12_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB12_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB12_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: .LBB12_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB12_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB12_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB12_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB12_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1_4(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i16_z_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB13_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB13_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB13_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB13_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB13_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB13_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB13_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: .LBB13_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB13_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB13_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i32_z_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: beqz s2, .LBB14_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: .LBB14_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: beqz s1, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: .LBB14_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: addi a2, sp, 12
+; RV32I-SFB-NEXT: li a3, 5
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 12(sp)
+; RV32I-SFB-NEXT: bnez s2, .LBB14_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB14_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB14_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 5
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s1, .LBB14_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
+; RV32I-SFBILOAD-NEXT: li a3, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1_4(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i64_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s2, a1, 1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: bnez s2, .LBB15_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB15_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s1, a1, 1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: bnez s1, .LBB15_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB15_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: andi s2, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: bnez s2, .LBB15_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB15_2: # %entry
+; RV32I-SFB-NEXT: bnez s2, .LBB15_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: .LBB15_4: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: andi s1, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s1, .LBB15_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB15_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr seq_cst, align 8 ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_s_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB16_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB16_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB16_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB16_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s5, .LBB16_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: .LBB16_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB16_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB16_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB16_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB16_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB16_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i8_z_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB17_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB17_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB17_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB17_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s5, .LBB17_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB17_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB17_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: .LBB17_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB17_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB17_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB17_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_s_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB18_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB18_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB18_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB18_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB18_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 16
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB18_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB18_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB18_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB18_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i16_z_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB19_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB19_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB19_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB19_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB19_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB19_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB19_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: .LBB19_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB19_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB19_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB19_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i32_z_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: mv a2, sp
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB20_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: .LBB20_2: # %entry
+; RV32I-NEXT: addi a1, s5, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: lwu a0, 4(sp)
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB20_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB20_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: mv a2, sp
+; RV32I-SFB-NEXT: li a3, 5
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 0(sp)
+; RV32I-SFB-NEXT: beqz s5, .LBB20_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB20_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB20_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB20_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 5
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s3, .LBB20_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB20_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: mv a2, sp
+; RV32I-SFBILOAD-NEXT: li a3, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i64_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: .cfi_offset s4, -24
+; RV32I-NEXT: .cfi_offset s5, -28
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: andi s5, a1, 1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB21_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB21_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: .cfi_restore s1
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: andi s3, a1, 1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB21_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB21_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: .cfi_restore s1
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: .cfi_offset ra, -4
+; RV32I-SFB-NEXT: .cfi_offset s0, -8
+; RV32I-SFB-NEXT: .cfi_offset s1, -12
+; RV32I-SFB-NEXT: .cfi_offset s2, -16
+; RV32I-SFB-NEXT: .cfi_offset s3, -20
+; RV32I-SFB-NEXT: .cfi_offset s4, -24
+; RV32I-SFB-NEXT: .cfi_offset s5, -28
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: andi s5, a1, 1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: bnez s5, .LBB21_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB21_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB21_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: .LBB21_4: # %entry
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: .cfi_restore ra
+; RV32I-SFB-NEXT: .cfi_restore s0
+; RV32I-SFB-NEXT: .cfi_restore s1
+; RV32I-SFB-NEXT: .cfi_restore s2
+; RV32I-SFB-NEXT: .cfi_restore s3
+; RV32I-SFB-NEXT: .cfi_restore s4
+; RV32I-SFB-NEXT: .cfi_restore s5
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: .cfi_offset ra, -8
+; RV64I-SFB-NEXT: .cfi_offset s0, -16
+; RV64I-SFB-NEXT: .cfi_offset s1, -24
+; RV64I-SFB-NEXT: .cfi_offset s2, -32
+; RV64I-SFB-NEXT: .cfi_offset s3, -40
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: andi s3, a1, 1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 5
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s3, .LBB21_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB21_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: .cfi_restore ra
+; RV64I-SFB-NEXT: .cfi_restore s0
+; RV64I-SFB-NEXT: .cfi_restore s1
+; RV64I-SFB-NEXT: .cfi_restore s2
+; RV64I-SFB-NEXT: .cfi_restore s3
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
+; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
+; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
+; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
+; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
+; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
+; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: .cfi_restore ra
+; RV32I-SFBILOAD-NEXT: .cfi_restore s0
+; RV32I-SFBILOAD-NEXT: .cfi_restore s1
+; RV32I-SFBILOAD-NEXT: .cfi_restore s2
+; RV32I-SFBILOAD-NEXT: .cfi_restore s3
+; RV32I-SFBILOAD-NEXT: .cfi_restore s4
+; RV32I-SFBILOAD-NEXT: .cfi_restore s5
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
+; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
+; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
+; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
+; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB21_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: .cfi_restore ra
+; RV64I-SFBILOAD-NEXT: .cfi_restore s0
+; RV64I-SFBILOAD-NEXT: .cfi_restore s1
+; RV64I-SFBILOAD-NEXT: .cfi_restore s2
+; RV64I-SFBILOAD-NEXT: .cfi_restore s3
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr seq_cst, align 8 ; load 64-bit value
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
new file mode 100644
index 0000000000000..ebdf25c66fd77
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
@@ -0,0 +1,1022 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+; RV32I-LABEL: test_i8_s_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a4, 4(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a4, 4(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i8_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+; RV32I-LABEL: test_i8_z_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a4, 4(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a4, 4(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i16_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+; RV32I-LABEL: test_i16_s_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a4, 8(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a4, 8(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i16_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+; RV32I-LABEL: test_i16_z_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a4, 8(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a4, 8(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i32_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+; RV32I-LABEL: test_i32_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a4, 16(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB4_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a4, 16(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB4_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB4_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB4_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+
+define i64 @test_i8_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+; RV32I-LABEL: test_i8_s_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a6, 4(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB5_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: j .LBB5_3
+; RV32I-NEXT: .LBB5_2:
+; RV32I-NEXT: srai a3, a6, 31
+; RV32I-NEXT: .LBB5_3: # %entry
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: sltu a1, a0, a6
+; RV32I-NEXT: add a3, a3, a5
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a4, 4(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: lw a5, 4(a4)
+; RV32I-SFB-NEXT: lw a4, 0(a4)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB5_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB5_4: # %entry
+; RV32I-SFB-NEXT: add a0, a2, a4
+; RV32I-SFB-NEXT: sltu a1, a0, a2
+; RV32I-SFB-NEXT: add a3, a3, a5
+; RV32I-SFB-NEXT: add a1, a3, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB5_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a3, a3, a5
+; RV32I-SFBILOAD-NEXT: add a1, a3, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i8_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+; RV32I-LABEL: test_i8_z_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a6, 4(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB6_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sltu a2, a0, a6
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a4, 4(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a5, 4(a0)
+; RV32I-SFB-NEXT: lw a6, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a2, a0, a5
+; RV32I-SFB-NEXT: bnez a1, .LBB6_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: add a6, a6, a3
+; RV32I-SFB-NEXT: .LBB6_4: # %entry
+; RV32I-SFB-NEXT: add a1, a6, a2
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a2, a0, a2
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: add a5, a5, a3
+; RV32I-SFBILOAD-NEXT: .LBB6_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a1, a5, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i16_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+; RV32I-LABEL: test_i16_s_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a6, 8(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: srai a3, a6, 31
+; RV32I-NEXT: .LBB7_3: # %entry
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: sltu a1, a0, a6
+; RV32I-NEXT: add a3, a3, a5
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a4, 8(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB7_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: lw a5, 4(a4)
+; RV32I-SFB-NEXT: lw a4, 0(a4)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB7_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB7_4: # %entry
+; RV32I-SFB-NEXT: add a0, a2, a4
+; RV32I-SFB-NEXT: sltu a1, a0, a2
+; RV32I-SFB-NEXT: add a3, a3, a5
+; RV32I-SFB-NEXT: add a1, a3, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB7_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a3, a3, a5
+; RV32I-SFBILOAD-NEXT: add a1, a3, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i16_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+; RV32I-LABEL: test_i16_z_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a6, 8(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sltu a2, a0, a6
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a4, 8(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a5, 8(a0)
+; RV32I-SFB-NEXT: lw a6, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a2, a0, a5
+; RV32I-SFB-NEXT: bnez a1, .LBB8_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: add a6, a6, a3
+; RV32I-SFB-NEXT: .LBB8_4: # %entry
+; RV32I-SFB-NEXT: add a1, a6, a2
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a2, a0, a2
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: add a5, a5, a3
+; RV32I-SFBILOAD-NEXT: .LBB8_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a1, a5, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i32_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+; RV32I-LABEL: test_i32_z_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a6, 16(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB9_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sltu a2, a0, a6
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lwu a4, 16(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a5, 16(a0)
+; RV32I-SFB-NEXT: lw a6, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a2, a0, a5
+; RV32I-SFB-NEXT: bnez a1, .LBB9_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: add a6, a6, a3
+; RV32I-SFB-NEXT: .LBB9_4: # %entry
+; RV32I-SFB-NEXT: add a1, a6, a2
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a2, a0, a2
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: add a5, a5, a3
+; RV32I-SFBILOAD-NEXT: .LBB9_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a1, a5, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu a2, 16(a0)
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i64_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+; RV32I-LABEL: test_i64_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a7, 32(a0)
+; RV32I-NEXT: lw a6, 36(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: mv a7, a2
+; RV32I-NEXT: .LBB10_2: # %entry
+; RV32I-NEXT: add a0, a7, a0
+; RV32I-NEXT: sltu a1, a0, a7
+; RV32I-NEXT: add a5, a6, a5
+; RV32I-NEXT: add a1, a5, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: ld a4, 32(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB10_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a5, 32(a0)
+; RV32I-SFB-NEXT: lw a6, 36(a0)
+; RV32I-SFB-NEXT: lw a7, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB10_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a6, a3
+; RV32I-SFB-NEXT: .LBB10_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB10_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB10_4: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a1, a0, a5
+; RV32I-SFB-NEXT: add a6, a6, a7
+; RV32I-SFB-NEXT: add a1, a6, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB10_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB10_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
+; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 36(a0)
+; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a3, a3, a5
+; RV32I-SFBILOAD-NEXT: add a1, a3, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: ld a2, 32(a0)
+; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load i64, ptr %addr ; load 64-bit value
+ %val1 = load volatile i64, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
index 9ed1218cf7fb5..5fc3433458d50 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
@@ -1047,15 +1047,95 @@ entry:
ret i64 %res
}
+define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) {
+; RV32I-LABEL: test_i32_z_1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB14_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw a2, 16(a0)
+; RV32I-NEXT: .LBB14_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu a2, 16(a0)
+; RV64I-NEXT: .LBB14_2: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB14_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB14_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB14_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB14_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB14_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: beqz a4, .LBB14_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu a2, 16(a0)
+; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
; RV32I-LABEL: test_i64_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: beqz a1, .LBB14_2
+; RV32I-NEXT: beqz a1, .LBB15_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lw a2, 32(a0)
; RV32I-NEXT: lw a3, 36(a0)
-; RV32I-NEXT: .LBB14_2: # %entry
+; RV32I-NEXT: .LBB15_2: # %entry
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
@@ -1063,10 +1143,10 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
; RV64I-LABEL: test_i64_1:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: beqz a1, .LBB14_2
+; RV64I-NEXT: beqz a1, .LBB15_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: ld a2, 32(a0)
-; RV64I-NEXT: .LBB14_2: # %entry
+; RV64I-NEXT: .LBB15_2: # %entry
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1075,14 +1155,14 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
; RV32I-SFB-NEXT: lw a4, 32(a0)
; RV32I-SFB-NEXT: lw a5, 36(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB14_2
+; RV32I-SFB-NEXT: bnez a1, .LBB15_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a4, a2
-; RV32I-SFB-NEXT: .LBB14_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB14_4
+; RV32I-SFB-NEXT: .LBB15_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB15_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: mv a5, a3
-; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: .LBB15_4: # %entry
; RV32I-SFB-NEXT: mv a0, a4
; RV32I-SFB-NEXT: mv a1, a5
; RV32I-SFB-NEXT: ret
@@ -1091,35 +1171,35 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: ld a0, 32(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB14_2
+; RV64I-SFB-NEXT: bnez a1, .LBB15_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: .LBB15_2: # %entry
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_1:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB14_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB15_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
-; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB14_4
+; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB15_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: lw a3, 36(a0)
-; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i64_1:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB15_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: ld a2, 32(a0)
+; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
@@ -1135,12 +1215,12 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB15_2
+; RV32I-NEXT: bnez a1, .LBB16_2
; RV32I-NEXT: # %bb.1: # %entry
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB15_2:
+; RV32I-NEXT: .LBB16_2:
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
@@ -1149,24 +1229,24 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-NEXT: lb a0, 4(a0)
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB15_2
+; RV64I-NEXT: bnez a1, .LBB16_2
; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB15_2: # %entry
+; RV64I-NEXT: .LBB16_2: # %entry
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lb a0, 4(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB15_2
+; RV32I-SFB-NEXT: beqz a1, .LBB16_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB15_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB15_4
+; RV32I-SFB-NEXT: .LBB16_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB16_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB15_4: # %entry
+; RV32I-SFB-NEXT: .LBB16_4: # %entry
; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a0, a2
@@ -1177,10 +1257,10 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lb a0, 4(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB15_2
+; RV64I-SFB-NEXT: bnez a1, .LBB16_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB15_2: # %entry
+; RV64I-SFB-NEXT: .LBB16_2: # %entry
; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
@@ -1188,14 +1268,14 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB15_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB15_4
+; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a0, a2
@@ -1206,10 +1286,10 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
@@ -1228,10 +1308,10 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB16_2
+; RV32I-NEXT: bnez a1, .LBB17_2
; RV32I-NEXT: # %bb.1: # %entry
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB16_2: # %entry
+; RV32I-NEXT: .LBB17_2: # %entry
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
@@ -1241,24 +1321,24 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-NEXT: lbu a0, 4(a0)
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB16_2
+; RV64I-NEXT: bnez a1, .LBB17_2
; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB16_2: # %entry
+; RV64I-NEXT: .LBB17_2: # %entry
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lbu a0, 4(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB16_2
+; RV32I-SFB-NEXT: beqz a1, .LBB17_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB16_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB16_4
+; RV32I-SFB-NEXT: .LBB17_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB17_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB16_4: # %entry
+; RV32I-SFB-NEXT: .LBB17_4: # %entry
; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a1, a3
@@ -1268,10 +1348,10 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lbu a0, 4(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB16_2
+; RV64I-SFB-NEXT: bnez a1, .LBB17_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB16_2: # %entry
+; RV64I-SFB-NEXT: .LBB17_2: # %entry
; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
@@ -1279,14 +1359,14 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB16_4
+; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a1, a3
@@ -1296,10 +1376,10 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
@@ -1318,12 +1398,12 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB17_2
+; RV32I-NEXT: bnez a1, .LBB18_2
; RV32I-NEXT: # %bb.1: # %entry
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB17_2:
+; RV32I-NEXT: .LBB18_2:
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
@@ -1332,24 +1412,24 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-NEXT: lh a0, 8(a0)
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB17_2
+; RV64I-NEXT: bnez a1, .LBB18_2
; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB17_2: # %entry
+; RV64I-NEXT: .LBB18_2: # %entry
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lh a0, 8(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB17_2
+; RV32I-SFB-NEXT: beqz a1, .LBB18_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB17_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB17_4
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB18_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB17_4: # %entry
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a0, a2
@@ -1360,10 +1440,10 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lh a0, 8(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB17_2
+; RV64I-SFB-NEXT: bnez a1, .LBB18_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB17_2: # %entry
+; RV64I-SFB-NEXT: .LBB18_2: # %entry
; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
@@ -1371,14 +1451,14 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_4
+; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a0, a2
@@ -1389,10 +1469,10 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
@@ -1411,10 +1491,10 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB18_2
+; RV32I-NEXT: bnez a1, .LBB19_2
; RV32I-NEXT: # %bb.1: # %entry
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB18_2: # %entry
+; RV32I-NEXT: .LBB19_2: # %entry
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
@@ -1424,24 +1504,24 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-NEXT: lhu a0, 8(a0)
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB18_2
+; RV64I-NEXT: bnez a1, .LBB19_2
; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB18_2: # %entry
+; RV64I-NEXT: .LBB19_2: # %entry
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lhu a0, 8(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB18_2
+; RV32I-SFB-NEXT: beqz a1, .LBB19_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB18_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB18_4
+; RV32I-SFB-NEXT: .LBB19_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB19_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: .LBB19_4: # %entry
; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a1, a3
@@ -1451,10 +1531,10 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lhu a0, 8(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB18_2
+; RV64I-SFB-NEXT: bnez a1, .LBB19_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB18_2: # %entry
+; RV64I-SFB-NEXT: .LBB19_2: # %entry
; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
@@ -1462,14 +1542,14 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB18_4
+; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a1, a3
@@ -1479,10 +1559,10 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
@@ -1494,6 +1574,96 @@ entry:
ret i64 %res
}
+define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+; RV32I-LABEL: test_i32_z_store_64:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB20_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB20_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_store_64:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lwu a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB20_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB20_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_store_64:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB20_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB20_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB20_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB20_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_store_64:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB20_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB20_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_store_64:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB20_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB20_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_store_64:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB20_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-LABEL: test_i64_store_64:
; RV32I: # %bb.0: # %entry
@@ -1504,11 +1674,11 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: andi a7, a7, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a7, .LBB19_2
+; RV32I-NEXT: bnez a7, .LBB21_2
; RV32I-NEXT: # %bb.1: # %entry
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: .LBB19_2: # %entry
+; RV32I-NEXT: .LBB21_2: # %entry
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i64_store_64:
@@ -1516,10 +1686,10 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-NEXT: ld a0, 32(a0)
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB19_2
+; RV64I-NEXT: bnez a1, .LBB21_2
; RV64I-NEXT: # %bb.1: # %entry
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB19_2: # %entry
+; RV64I-NEXT: .LBB21_2: # %entry
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_store_64:
@@ -1527,14 +1697,14 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFB-NEXT: lw a7, 32(a0)
; RV32I-SFB-NEXT: lw t0, 36(a0)
; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB19_2
+; RV32I-SFB-NEXT: bnez a1, .LBB21_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a7, a2
-; RV32I-SFB-NEXT: .LBB19_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB19_4
+; RV32I-SFB-NEXT: .LBB21_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB21_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: mv t0, a3
-; RV32I-SFB-NEXT: .LBB19_4: # %entry
+; RV32I-SFB-NEXT: .LBB21_4: # %entry
; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a0, a7
@@ -1545,10 +1715,10 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: ld a0, 32(a0)
; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB19_2
+; RV64I-SFB-NEXT: bnez a1, .LBB21_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB19_2: # %entry
+; RV64I-SFB-NEXT: .LBB21_2: # %entry
; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
@@ -1557,14 +1727,14 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD-NEXT: lw a7, 32(a0)
; RV32I-SFBILOAD-NEXT: lw t0, 36(a0)
; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_2
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB21_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a7, a2
-; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
+; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB21_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: mv t0, a3
-; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
+; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a0, a7
@@ -1575,10 +1745,10 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
>From a26924a1dc673fbb1a9809c203a067fa8665e292 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Thu, 4 Dec 2025 16:47:56 +0530
Subject: [PATCH 06/11] fixup! Change i-load to iload in accordance with other
features
Change-Id: I55317e35262890ee9fe1d814f986a3764e4ec675
---
llvm/lib/Target/RISCV/RISCVFeatures.td | 6 ++++--
llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td | 2 +-
.../RISCV/short-forward-branch-opt-load-atomic-acquire.ll | 8 ++++----
.../short-forward-branch-opt-load-atomic-monotonic.ll | 8 ++++----
.../RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll | 8 ++++----
.../RISCV/short-forward-branch-opt-load-volatile.ll | 8 ++++----
llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll | 8 ++++----
7 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 5c8a617932e34..dd7f36136ea4e 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1898,6 +1898,7 @@ def TuneNoDefaultUnroll
// - IALU: RVI Integer instructions, plus ANDN/ORN/XNOR (Zbb/Zbkb)
// - IMinMax: Zbb MIN(U)/MAX(U)
// - IMul: MUL
+// - ILoad: LB(U)/LH(U)/LW(U)/LD
//
// We make the simplifying assumption that any microarches that implement
// any "short forward branches" can do the IALU fusions, and can opt into
@@ -1930,9 +1931,10 @@ def HasShortForwardBranchIMul : Predicate<"Subtarget->hasShortForwardBranchIMul(
def TuneShortForwardBranchILoad
- : SubtargetFeature<"short-forward-branch-i-load", "HasShortForwardBranchILoad",
+ : SubtargetFeature<"short-forward-branch-iload", "HasShortForwardBranchILoad",
"true", "Enable short forward branch optimization for load instructions",
- [TuneShortForwardBranchOpt]>;
+ [TuneShortForwardBranchIALU]>;
+def HasShortForwardBranchILoad : Predicate<"Subtarget->hasShortForwardBranchILoad()">;
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index c97b60452148e..e83246a82b28e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -178,7 +178,7 @@ def PseudoCCMINU : SFBALU_rr;
let Predicates = [HasShortForwardBranchIMul] in
def PseudoCCMUL : SFBALU_rr;
-let Predicates = [HasShortForwardBranchIMul] in {
+let Predicates = [HasShortForwardBranchILoad] in {
def PseudoCCLB : SFBLoad;
def PseudoCCLH : SFBLoad;
def PseudoCCLW : SFBLoad;
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
index 51f2643c94191..2c4a542553889 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
define i32 @test_i8_s_3(ptr %base, i1 %x, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
index c2564e6ac654f..781ae15b3f20a 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
define i32 @test_i8_s_2(ptr %base, i1 %x, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
index 9308fa38d95bd..c558931eb5a48 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
define i32 @test_i8_s_4(ptr %base, i1 %x, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
index ebdf25c66fd77..37f7a3020b820 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
define i32 @test_i8_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
index 5fc3433458d50..6c500468bb187 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-i-load | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) {
>From 5f769e99376266dc7506dea5eb495c9e3c322285 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Fri, 5 Dec 2025 11:46:36 +0530
Subject: [PATCH 07/11] fixup! Use cloneMemRefs correctly and improve tests
Change-Id: I498d6cbdfb6f59aab399d3ee73e2eb0bc0258a55
---
.../Target/RISCV/RISCVExpandPseudoInsts.cpp | 3 +-
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 1 +
...-forward-branch-opt-load-atomic-acquire.ll | 4746 +++-------------
...orward-branch-opt-load-atomic-monotonic.ll | 4628 +++-------------
...-forward-branch-opt-load-atomic-seq_cst.ll | 4860 +++--------------
.../short-forward-branch-opt-load-volatile.ll | 22 +-
.../RISCV/short-forward-branch-opt-load.ll | 44 +-
7 files changed, 2391 insertions(+), 11913 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index a18aad25ae745..55efead1ad887 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -294,8 +294,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
} else {
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
.add(MI.getOperand(5))
- .add(MI.getOperand(6))
- .cloneMemRefs(MI);
+ .add(MI.getOperand(6));
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 68d4c31626ee2..1940d36af4dcd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -967,6 +967,7 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
NewMI.add(LoadMI.getOperand(i));
+ NewMI.cloneMemRefs(LoadMI);
return NewMI;
}
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
index 2c4a542553889..1ba01ac5225d3 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
@@ -1,200 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-define i32 @test_i8_s_3(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_s_3(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_s_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB0_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB0_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s1, .LBB0_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB0_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB0_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB0_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -204,185 +84,71 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_3(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_z_3(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_z_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB1_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB1_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB1_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s1, .LBB1_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB1_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB1_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB1_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -392,191 +158,71 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_3(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_s_3(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_s_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB2_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB2_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB2_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB2_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB2_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB2_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -586,191 +232,71 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_3(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_z_3(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_z_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB3_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB3_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB3_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB3_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB3_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB3_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB3_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -780,179 +306,71 @@ entry:
ret i32 %res
}
-define i32 @test_i32_3(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i32_3(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i32_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: bnez s1, .LBB4_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB4_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB4_2: # %entry
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: bnez s1, .LBB4_2
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB4_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB4_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s1, .LBB4_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB4_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB4_2: # %entry
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s1, .LBB4_2
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB4_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB4_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -961,257 +379,77 @@ entry:
ret i32 %res
}
-define i32 @test_i8_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB5_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB5_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB5_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB5_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB5_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s3, .LBB5_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB5_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB5_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB5_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -1222,251 +460,77 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB6_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB6_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB6_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s3, .LBB6_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB6_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB6_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -1477,257 +541,77 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB7_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB7_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB7_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB7_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB7_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_store_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB7_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB7_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -1738,257 +622,77 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB8_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB8_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB8_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB8_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB8_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB8_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB8_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -1999,245 +703,77 @@ entry:
ret i32 %res
}
-define i32 @test_i32_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i32_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i32_store_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: bnez s3, .LBB9_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB9_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB9_2: # %entry
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_store_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB9_2
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB9_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB9_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_store_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s3, .LBB9_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_store_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s3, .LBB9_2
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB9_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_store_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_store_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -2247,218 +783,86 @@ entry:
ret i32 %res
}
-define i64 @test_i8_s_1_3(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_s_1_3(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_s_1_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB10_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB10_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_1_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB10_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB10_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_1_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s2, .LBB10_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB10_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB10_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB10_4
+; RV32I-SFB-NEXT: beqz a1, .LBB10_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB10_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_1_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB10_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB10_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB10_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_1_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_1_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -2468,212 +872,83 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_1_3(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_z_1_3(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_z_1_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB11_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB11_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_1_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB11_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB11_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_1_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s2, .LBB11_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB11_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB11_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB11_4
+; RV32I-SFB-NEXT: beqz a1, .LBB11_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB11_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_1_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB11_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB11_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB11_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_1_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_1_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -2683,218 +958,86 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_1_3(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_s_1_3(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_s_1_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB12_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB12_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB12_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_1_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB12_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB12_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB12_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_1_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB12_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB12_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB12_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB12_4
+; RV32I-SFB-NEXT: beqz a1, .LBB12_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB12_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_1_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB12_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB12_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB12_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_1_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_1_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -2904,218 +1047,83 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_1_3(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_z_1_3(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_z_1_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB13_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB13_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB13_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_1_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB13_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB13_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB13_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_1_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB13_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB13_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB13_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB13_4
+; RV32I-SFB-NEXT: beqz a1, .LBB13_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB13_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_1_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB13_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB13_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB13_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_1_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_1_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -3125,19 +1133,14 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1_3:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a3
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: andi s2, a1, 1
@@ -3157,24 +1160,15 @@ define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_1_3:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s0, a2
; RV64I-NEXT: andi s1, a1, 1
; RV64I-NEXT: addi a1, a0, 16
@@ -3190,25 +1184,16 @@ define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_1_3:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
; RV32I-SFB-NEXT: mv s0, a3
; RV32I-SFB-NEXT: mv s1, a2
; RV32I-SFB-NEXT: andi s2, a1, 1
@@ -3231,24 +1216,15 @@ define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_1_3:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
; RV64I-SFB-NEXT: mv s0, a2
; RV64I-SFB-NEXT: andi s1, a1, 1
; RV64I-SFB-NEXT: addi a1, a0, 16
@@ -3264,25 +1240,16 @@ define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_1_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
; RV32I-SFBILOAD-NEXT: mv s0, a3
; RV32I-SFBILOAD-NEXT: mv s1, a2
; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
@@ -3305,24 +1272,15 @@ define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_1_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
; RV64I-SFBILOAD-NEXT: mv s0, a2
; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
@@ -3338,11 +1296,7 @@ define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) {
; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
@@ -3352,19 +1306,14 @@ entry:
ret i64 %res
}
-define i64 @test_i64_1_3(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i64_1_3(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i64_1_3:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a3
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: andi s2, a1, 1
@@ -3380,55 +1329,27 @@ define i64 @test_i64_1_3(ptr %base, i1 %x, i64 %b) {
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i64_1_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: bnez s1, .LBB15_2
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB15_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB15_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_1_3:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
; RV32I-SFB-NEXT: mv s0, a3
; RV32I-SFB-NEXT: mv s1, a2
; RV32I-SFB-NEXT: andi s2, a1, 1
@@ -3447,55 +1368,27 @@ define i64 @test_i64_1_3(ptr %base, i1 %x, i64 %b) {
; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i64_1_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s1, .LBB15_2
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB15_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB15_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_1_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
; RV32I-SFBILOAD-NEXT: mv s0, a3
; RV32I-SFBILOAD-NEXT: mv s1, a2
; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
@@ -3514,41 +1407,18 @@ define i64 @test_i64_1_3(ptr %base, i1 %x, i64 %b) {
; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i64_1_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
@@ -3557,302 +1427,95 @@ entry:
ret i64 %res
}
-define i64 @test_i8_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_64_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB16_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB16_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB16_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB16_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_64_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB16_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB16_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB16_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_64_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s5, .LBB16_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB16_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB16_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB16_4
+; RV32I-SFB-NEXT: beqz a1, .LBB16_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB16_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_64_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB16_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB16_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB16_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB16_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -3863,296 +1526,92 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_64_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB17_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB17_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB17_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_64_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB17_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB17_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB17_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_64_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s5, .LBB17_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB17_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB17_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB17_4
+; RV32I-SFB-NEXT: bnez a1, .LBB17_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB17_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_64_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB17_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB17_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB17_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB17_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -4163,302 +1622,95 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_64_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB18_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB18_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB18_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB18_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_64_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB18_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB18_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB18_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_64_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB18_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB18_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 16
-; RV32I-SFB-NEXT: .LBB18_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB18_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
-; RV32I-SFB-NEXT: .LBB18_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_64_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB18_2
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB18_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB18_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB18_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB18_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4469,302 +1721,92 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_64_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB19_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB19_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB19_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_64_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB19_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB19_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB19_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_64_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB19_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB19_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB19_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB19_4
+; RV32I-SFB-NEXT: bnez a1, .LBB19_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB19_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_64_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB19_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB19_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB19_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB19_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4775,11 +1817,10 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64_3:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4787,13 +1828,6 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
; RV32I-NEXT: mv s2, a6
; RV32I-NEXT: mv s3, a5
; RV32I-NEXT: mv s4, a4
@@ -4821,31 +1855,17 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_store_64_3:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
; RV64I-NEXT: mv s1, a4
; RV64I-NEXT: mv s2, a3
; RV64I-NEXT: mv s0, a2
@@ -4866,19 +1886,12 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_store_64_3:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4886,13 +1899,6 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
; RV32I-SFB-NEXT: mv s0, a6
; RV32I-SFB-NEXT: mv s1, a5
; RV32I-SFB-NEXT: mv s2, a4
@@ -4923,31 +1929,17 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_store_64_3:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
; RV64I-SFB-NEXT: mv s0, a4
; RV64I-SFB-NEXT: mv s1, a3
; RV64I-SFB-NEXT: mv s2, a2
@@ -4968,19 +1960,12 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4988,13 +1973,6 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
; RV32I-SFBILOAD-NEXT: mv s0, a6
; RV32I-SFBILOAD-NEXT: mv s1, a5
; RV32I-SFBILOAD-NEXT: mv s2, a4
@@ -5025,31 +2003,17 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
; RV64I-SFBILOAD-NEXT: mv s0, a4
; RV64I-SFBILOAD-NEXT: mv s1, a3
; RV64I-SFBILOAD-NEXT: mv s2, a2
@@ -5070,13 +2034,7 @@ define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
@@ -5087,11 +2045,10 @@ entry:
ret i64 %res
}
-define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i64_store_64_3:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5099,13 +2056,6 @@ define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
; RV32I-NEXT: mv s2, a6
; RV32I-NEXT: mv s3, a5
; RV32I-NEXT: mv s4, a4
@@ -5129,61 +2079,24 @@ define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i64_store_64_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB21_2
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB21_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB21_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_store_64_3:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5191,13 +2104,6 @@ define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
; RV32I-SFB-NEXT: mv s0, a6
; RV32I-SFB-NEXT: mv s1, a5
; RV32I-SFB-NEXT: mv s2, a4
@@ -5224,61 +2130,24 @@ define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i64_store_64_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 2
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s3, .LBB21_2
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB21_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB21_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_store_64_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5286,13 +2155,6 @@ define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
; RV32I-SFBILOAD-NEXT: mv s0, a6
; RV32I-SFBILOAD-NEXT: mv s1, a5
; RV32I-SFBILOAD-NEXT: mv s2, a4
@@ -5319,55 +2181,19 @@ define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i64_store_64_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB21_2
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
index 781ae15b3f20a..9d3606dca49a8 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
@@ -1,200 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-define i32 @test_i8_s_2(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_s_2(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_s_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB0_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB0_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s1, .LBB0_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB0_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB0_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB0_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -204,185 +78,65 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_2(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_z_2(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_z_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB1_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB1_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB1_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s1, .LBB1_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB1_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB1_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB1_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -392,191 +146,65 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_2(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_s_2(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_s_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB2_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB2_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB2_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB2_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB2_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB2_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -586,191 +214,65 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_2(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_z_2(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_z_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB3_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB3_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB3_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB3_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB3_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB3_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB3_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -780,179 +282,65 @@ entry:
ret i32 %res
}
-define i32 @test_i32_2(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i32_2(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i32_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: bnez s1, .LBB4_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB4_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB4_2: # %entry
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: bnez s1, .LBB4_2
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB4_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB4_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s1, .LBB4_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB4_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB4_2: # %entry
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s1, .LBB4_2
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB4_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB4_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -961,257 +349,71 @@ entry:
ret i32 %res
}
-define i32 @test_i8_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB5_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB5_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB5_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB5_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB5_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s3, .LBB5_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB5_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB5_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB5_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -1222,251 +424,71 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB6_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB6_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB6_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s3, .LBB6_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB6_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB6_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -1477,257 +499,71 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB7_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB7_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB7_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB7_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB7_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB7_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB7_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -1738,257 +574,71 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB8_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB8_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB8_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB8_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB8_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB8_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB8_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -1999,245 +649,71 @@ entry:
ret i32 %res
}
-define i32 @test_i32_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i32_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i32_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: bnez s3, .LBB9_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB9_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB9_2: # %entry
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB9_2
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB9_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB9_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s3, .LBB9_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s3, .LBB9_2
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB9_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -2247,218 +723,80 @@ entry:
ret i32 %res
}
-define i64 @test_i8_s_1_2(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_s_1_2(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_s_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB10_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB10_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB10_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB10_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s2, .LBB10_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB10_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB10_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB10_4
+; RV32I-SFB-NEXT: beqz a1, .LBB10_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB10_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB10_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB10_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB10_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -2468,212 +806,77 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_1_2(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_z_1_2(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_z_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB11_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB11_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB11_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB11_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s2, .LBB11_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB11_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB11_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB11_4
+; RV32I-SFB-NEXT: beqz a1, .LBB11_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB11_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB11_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB11_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB11_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -2683,218 +886,80 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_1_2(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_s_1_2(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_s_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB12_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB12_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB12_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB12_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB12_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB12_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB12_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB12_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB12_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB12_4
+; RV32I-SFB-NEXT: beqz a1, .LBB12_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB12_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB12_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB12_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB12_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -2904,218 +969,77 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_1_2(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_z_1_2(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_z_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB13_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: bnez a1, .LBB13_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB13_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB13_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB13_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB13_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB13_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB13_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB13_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB13_4
+; RV32I-SFB-NEXT: beqz a1, .LBB13_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB13_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB13_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB13_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB13_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -3125,19 +1049,14 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a3
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: andi s2, a1, 1
@@ -3157,24 +1076,15 @@ define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_1_2:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s0, a2
; RV64I-NEXT: andi s1, a1, 1
; RV64I-NEXT: addi a1, a0, 16
@@ -3190,25 +1100,16 @@ define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_1_2:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
; RV32I-SFB-NEXT: mv s0, a3
; RV32I-SFB-NEXT: mv s1, a2
; RV32I-SFB-NEXT: andi s2, a1, 1
@@ -3231,24 +1132,15 @@ define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_1_2:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
; RV64I-SFB-NEXT: mv s0, a2
; RV64I-SFB-NEXT: andi s1, a1, 1
; RV64I-SFB-NEXT: addi a1, a0, 16
@@ -3264,25 +1156,16 @@ define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
; RV32I-SFBILOAD-NEXT: mv s0, a3
; RV32I-SFBILOAD-NEXT: mv s1, a2
; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
@@ -3305,24 +1188,15 @@ define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
; RV64I-SFBILOAD-NEXT: mv s0, a2
; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
@@ -3338,11 +1212,7 @@ define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) {
; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
@@ -3352,19 +1222,14 @@ entry:
ret i64 %res
}
-define i64 @test_i64_1_2(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i64_1_2(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i64_1_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a3
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: andi s2, a1, 1
@@ -3380,55 +1245,26 @@ define i64 @test_i64_1_2(ptr %base, i1 %x, i64 %b) {
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i64_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: bnez s1, .LBB15_2
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: bnez a1, .LBB15_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB15_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_1_2:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
; RV32I-SFB-NEXT: mv s0, a3
; RV32I-SFB-NEXT: mv s1, a2
; RV32I-SFB-NEXT: andi s2, a1, 1
@@ -3447,55 +1283,26 @@ define i64 @test_i64_1_2(ptr %base, i1 %x, i64 %b) {
; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i64_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s1, .LBB15_2
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB15_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB15_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
; RV32I-SFBILOAD-NEXT: mv s0, a3
; RV32I-SFBILOAD-NEXT: mv s1, a2
; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
@@ -3514,41 +1321,17 @@ define i64 @test_i64_1_2(ptr %base, i1 %x, i64 %b) {
; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i64_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
@@ -3557,302 +1340,89 @@ entry:
ret i64 %res
}
-define i64 @test_i8_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB16_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB16_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB16_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB16_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB16_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB16_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB16_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s5, .LBB16_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB16_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB16_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB16_4
+; RV32I-SFB-NEXT: beqz a1, .LBB16_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB16_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB16_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB16_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB16_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB16_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -3863,296 +1433,86 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB17_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB17_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB17_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB17_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB17_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB17_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s5, .LBB17_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB17_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB17_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB17_4
+; RV32I-SFB-NEXT: bnez a1, .LBB17_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB17_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB17_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB17_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB17_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB17_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -4163,302 +1523,89 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB18_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB18_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB18_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB18_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB18_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB18_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB18_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB18_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB18_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 16
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB18_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB18_4
+; RV32I-SFB-NEXT: beqz a1, .LBB18_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
-; RV32I-SFB-NEXT: .LBB18_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_64_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB18_2
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB18_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB18_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB18_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4469,302 +1616,86 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB19_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB19_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB19_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB19_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB19_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB19_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB19_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB19_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB19_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB19_4
+; RV32I-SFB-NEXT: bnez a1, .LBB19_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB19_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB19_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB19_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB19_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB19_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4775,11 +1706,10 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4787,13 +1717,6 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
; RV32I-NEXT: mv s2, a6
; RV32I-NEXT: mv s3, a5
; RV32I-NEXT: mv s4, a4
@@ -4821,31 +1744,17 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_store_64_2:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
; RV64I-NEXT: mv s1, a4
; RV64I-NEXT: mv s2, a3
; RV64I-NEXT: mv s0, a2
@@ -4866,19 +1775,12 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4886,13 +1788,6 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
; RV32I-SFB-NEXT: mv s0, a6
; RV32I-SFB-NEXT: mv s1, a5
; RV32I-SFB-NEXT: mv s2, a4
@@ -4923,31 +1818,17 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
; RV64I-SFB-NEXT: mv s0, a4
; RV64I-SFB-NEXT: mv s1, a3
; RV64I-SFB-NEXT: mv s2, a2
@@ -4968,19 +1849,12 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4988,13 +1862,6 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
; RV32I-SFBILOAD-NEXT: mv s0, a6
; RV32I-SFBILOAD-NEXT: mv s1, a5
; RV32I-SFBILOAD-NEXT: mv s2, a4
@@ -5025,31 +1892,17 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
; RV64I-SFBILOAD-NEXT: mv s0, a4
; RV64I-SFBILOAD-NEXT: mv s1, a3
; RV64I-SFBILOAD-NEXT: mv s2, a2
@@ -5070,13 +1923,7 @@ define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
@@ -5087,11 +1934,10 @@ entry:
ret i64 %res
}
-define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i64_store_64_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5099,13 +1945,6 @@ define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
; RV32I-NEXT: mv s2, a6
; RV32I-NEXT: mv s3, a5
; RV32I-NEXT: mv s4, a4
@@ -5129,61 +1968,23 @@ define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i64_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB21_2
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB21_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB21_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5191,13 +1992,6 @@ define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
; RV32I-SFB-NEXT: mv s0, a6
; RV32I-SFB-NEXT: mv s1, a5
; RV32I-SFB-NEXT: mv s2, a4
@@ -5224,61 +2018,23 @@ define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i64_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s3, .LBB21_2
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB21_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB21_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5286,13 +2042,6 @@ define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
; RV32I-SFBILOAD-NEXT: mv s0, a6
; RV32I-SFBILOAD-NEXT: mv s1, a5
; RV32I-SFBILOAD-NEXT: mv s2, a4
@@ -5319,55 +2068,18 @@ define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i64_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB21_2
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
index c558931eb5a48..f4aa40185ed9c 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
@@ -1,200 +1,86 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-define i32 @test_i8_s_4(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_s_4(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_s_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB0_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB0_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s1, .LBB0_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB0_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB0_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB0_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -204,185 +90,77 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_4(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_z_4(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_z_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB1_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB1_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB1_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s1, .LBB1_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB1_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB1_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB1_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -392,191 +170,77 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_4(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_s_4(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_s_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB2_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB2_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB2_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB2_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB2_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB2_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -586,191 +250,77 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_4(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_z_4(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_z_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB3_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB3_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB3_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB3_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB3_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB3_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB3_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -780,179 +330,77 @@ entry:
ret i32 %res
}
-define i32 @test_i32_4(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i32_4(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i32_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s1, a1, 1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: bnez s1, .LBB4_2
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB4_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB4_2: # %entry
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: bnez s1, .LBB4_2
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB4_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB4_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: andi s1, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s1, .LBB4_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB4_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB4_2: # %entry
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s1, .LBB4_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB4_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB4_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -961,257 +409,83 @@ entry:
ret i32 %res
}
-define i32 @test_i8_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB5_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB5_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB5_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB5_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB5_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s3, .LBB5_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB5_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB5_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB5_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB5_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -1222,251 +496,83 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB6_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB6_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB6_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s3, .LBB6_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB6_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB6_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB6_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -1477,257 +583,83 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB7_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB7_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB7_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB7_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB7_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_store_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB7_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB7_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB7_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -1738,257 +670,83 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB8_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB8_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB8_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB8_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB8_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB8_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB8_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB8_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -1999,245 +757,83 @@ entry:
ret i32 %res
}
-define i32 @test_i32_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i32_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i32_store_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: andi s3, a1, 1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: bnez s3, .LBB9_2
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB9_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB9_2: # %entry
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_store_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB9_2
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB9_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB9_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_store_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: andi s3, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s3, .LBB9_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_store_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s3, .LBB9_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB9_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_store_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_store_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB9_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -2247,218 +843,92 @@ entry:
ret i32 %res
}
-define i64 @test_i8_s_1_4(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_s_1_4(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_s_1_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB10_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB10_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_1_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB10_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB10_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_1_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s2, .LBB10_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB10_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB10_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB10_4
+; RV32I-SFB-NEXT: beqz a1, .LBB10_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB10_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_1_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB10_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB10_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB10_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_1_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB10_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_1_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -2468,212 +938,89 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_1_4(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_z_1_4(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_z_1_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB11_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB11_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_1_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB11_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB11_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_1_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s2, .LBB11_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB11_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB11_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB11_4
+; RV32I-SFB-NEXT: beqz a1, .LBB11_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB11_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_1_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB11_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB11_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB11_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_1_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB11_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_1_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -2683,218 +1030,92 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_1_4(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_s_1_4(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_s_1_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB12_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB12_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB12_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_1_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB12_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB12_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB12_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_1_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB12_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: beqz a1, .LBB12_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB12_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB12_4
+; RV32I-SFB-NEXT: beqz a1, .LBB12_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB12_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_1_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB12_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB12_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB12_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_1_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB12_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_1_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -2904,218 +1125,89 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_1_4(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_z_1_4(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_z_1_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB13_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB13_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB13_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_1_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB13_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB13_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB13_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_1_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB13_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB13_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB13_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB13_4
+; RV32I-SFB-NEXT: beqz a1, .LBB13_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB13_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_1_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB13_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB13_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB13_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_1_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB13_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_1_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -3125,19 +1217,14 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1_4:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a3
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: andi s2, a1, 1
@@ -3157,24 +1244,15 @@ define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_1_4:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s0, a2
; RV64I-NEXT: andi s1, a1, 1
; RV64I-NEXT: addi a1, a0, 16
@@ -3190,25 +1268,16 @@ define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_1_4:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
; RV32I-SFB-NEXT: mv s0, a3
; RV32I-SFB-NEXT: mv s1, a2
; RV32I-SFB-NEXT: andi s2, a1, 1
@@ -3231,24 +1300,15 @@ define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_1_4:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
; RV64I-SFB-NEXT: mv s0, a2
; RV64I-SFB-NEXT: andi s1, a1, 1
; RV64I-SFB-NEXT: addi a1, a0, 16
@@ -3264,25 +1324,16 @@ define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_1_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
; RV32I-SFBILOAD-NEXT: mv s0, a3
; RV32I-SFBILOAD-NEXT: mv s1, a2
; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
@@ -3305,24 +1356,15 @@ define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_1_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
; RV64I-SFBILOAD-NEXT: mv s0, a2
; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
@@ -3338,11 +1380,7 @@ define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) {
; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
@@ -3352,19 +1390,14 @@ entry:
ret i64 %res
}
-define i64 @test_i64_1_4(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i64_1_4(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i64_1_4:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a3
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: andi s2, a1, 1
@@ -3380,55 +1413,28 @@ define i64 @test_i64_1_4(ptr %base, i1 %x, i64 %b) {
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i64_1_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: bnez s1, .LBB15_2
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB15_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB15_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_1_4:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 16
; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
; RV32I-SFB-NEXT: mv s0, a3
; RV32I-SFB-NEXT: mv s1, a2
; RV32I-SFB-NEXT: andi s2, a1, 1
@@ -3447,55 +1453,28 @@ define i64 @test_i64_1_4(ptr %base, i1 %x, i64 %b) {
; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i64_1_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s1, .LBB15_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB15_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB15_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_1_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 16
; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
; RV32I-SFBILOAD-NEXT: mv s0, a3
; RV32I-SFBILOAD-NEXT: mv s1, a2
; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
@@ -3514,41 +1493,19 @@ define i64 @test_i64_1_4(ptr %base, i1 %x, i64 %b) {
; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i64_1_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
@@ -3557,302 +1514,101 @@ entry:
ret i64 %res
}
-define i64 @test_i8_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_64_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB16_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB16_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB16_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB16_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_64_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB16_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB16_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB16_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_64_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s5, .LBB16_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB16_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB16_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB16_4
+; RV32I-SFB-NEXT: beqz a1, .LBB16_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB16_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_64_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB16_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB16_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB16_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB16_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB16_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -3863,296 +1619,98 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_64_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB17_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB17_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB17_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_64_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB17_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB17_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB17_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_64_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s5, .LBB17_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB17_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB17_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB17_4
+; RV32I-SFB-NEXT: bnez a1, .LBB17_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB17_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_64_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB17_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB17_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB17_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB17_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB17_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -4163,302 +1721,101 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_64_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB18_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB18_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB18_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB18_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_64_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB18_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB18_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB18_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_64_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB18_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB18_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 16
-; RV32I-SFB-NEXT: .LBB18_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB18_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
-; RV32I-SFB-NEXT: .LBB18_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_64_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB18_2
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB18_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB18_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB18_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB18_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB18_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4469,302 +1826,98 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_64_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB19_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB19_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB19_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_64_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB19_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB19_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB19_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_64_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB19_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB19_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB19_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB19_4
+; RV32I-SFB-NEXT: bnez a1, .LBB19_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB19_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_64_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB19_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB19_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB19_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB19_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB19_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4775,11 +1928,10 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64_4:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4787,13 +1939,6 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
; RV32I-NEXT: mv s2, a6
; RV32I-NEXT: mv s3, a5
; RV32I-NEXT: mv s4, a4
@@ -4821,31 +1966,17 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_store_64_4:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
; RV64I-NEXT: mv s1, a4
; RV64I-NEXT: mv s2, a3
; RV64I-NEXT: mv s0, a2
@@ -4866,19 +1997,12 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_store_64_4:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4886,13 +2010,6 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
; RV32I-SFB-NEXT: mv s0, a6
; RV32I-SFB-NEXT: mv s1, a5
; RV32I-SFB-NEXT: mv s2, a4
@@ -4923,31 +2040,17 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_store_64_4:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
; RV64I-SFB-NEXT: mv s0, a4
; RV64I-SFB-NEXT: mv s1, a3
; RV64I-SFB-NEXT: mv s2, a2
@@ -4968,19 +2071,12 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -4988,13 +2084,6 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
; RV32I-SFBILOAD-NEXT: mv s0, a6
; RV32I-SFBILOAD-NEXT: mv s1, a5
; RV32I-SFBILOAD-NEXT: mv s2, a4
@@ -5025,31 +2114,17 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
; RV64I-SFBILOAD-NEXT: mv s0, a4
; RV64I-SFBILOAD-NEXT: mv s1, a3
; RV64I-SFBILOAD-NEXT: mv s2, a2
@@ -5070,13 +2145,7 @@ define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c)
; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
@@ -5087,11 +2156,10 @@ entry:
ret i64 %res
}
-define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i64_store_64_4:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5099,13 +2167,6 @@ define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: .cfi_offset s0, -8
-; RV32I-NEXT: .cfi_offset s1, -12
-; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: .cfi_offset s3, -20
-; RV32I-NEXT: .cfi_offset s4, -24
-; RV32I-NEXT: .cfi_offset s5, -28
; RV32I-NEXT: mv s2, a6
; RV32I-NEXT: mv s3, a5
; RV32I-NEXT: mv s4, a4
@@ -5129,61 +2190,25 @@ define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: .cfi_restore s0
-; RV32I-NEXT: .cfi_restore s1
-; RV32I-NEXT: .cfi_restore s2
-; RV32I-NEXT: .cfi_restore s3
-; RV32I-NEXT: .cfi_restore s4
-; RV32I-NEXT: .cfi_restore s5
; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i64_store_64_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: .cfi_def_cfa_offset 48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: .cfi_offset s0, -16
-; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: .cfi_offset s2, -32
-; RV64I-NEXT: .cfi_offset s3, -40
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 5
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB21_2
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB21_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB21_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: .cfi_restore s0
-; RV64I-NEXT: .cfi_restore s1
-; RV64I-NEXT: .cfi_restore s2
-; RV64I-NEXT: .cfi_restore s3
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_store_64_4:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5191,13 +2216,6 @@ define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: .cfi_offset ra, -4
-; RV32I-SFB-NEXT: .cfi_offset s0, -8
-; RV32I-SFB-NEXT: .cfi_offset s1, -12
-; RV32I-SFB-NEXT: .cfi_offset s2, -16
-; RV32I-SFB-NEXT: .cfi_offset s3, -20
-; RV32I-SFB-NEXT: .cfi_offset s4, -24
-; RV32I-SFB-NEXT: .cfi_offset s5, -28
; RV32I-SFB-NEXT: mv s0, a6
; RV32I-SFB-NEXT: mv s1, a5
; RV32I-SFB-NEXT: mv s2, a4
@@ -5224,61 +2242,25 @@ define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: .cfi_restore ra
-; RV32I-SFB-NEXT: .cfi_restore s0
-; RV32I-SFB-NEXT: .cfi_restore s1
-; RV32I-SFB-NEXT: .cfi_restore s2
-; RV32I-SFB-NEXT: .cfi_restore s3
-; RV32I-SFB-NEXT: .cfi_restore s4
-; RV32I-SFB-NEXT: .cfi_restore s5
; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i64_store_64_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: .cfi_offset ra, -8
-; RV64I-SFB-NEXT: .cfi_offset s0, -16
-; RV64I-SFB-NEXT: .cfi_offset s1, -24
-; RV64I-SFB-NEXT: .cfi_offset s2, -32
-; RV64I-SFB-NEXT: .cfi_offset s3, -40
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 5
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s3, .LBB21_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB21_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB21_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: .cfi_restore ra
-; RV64I-SFB-NEXT: .cfi_restore s0
-; RV64I-SFB-NEXT: .cfi_restore s1
-; RV64I-SFB-NEXT: .cfi_restore s2
-; RV64I-SFB-NEXT: .cfi_restore s3
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_store_64_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 32
; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
@@ -5286,13 +2268,6 @@ define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: .cfi_offset ra, -4
-; RV32I-SFBILOAD-NEXT: .cfi_offset s0, -8
-; RV32I-SFBILOAD-NEXT: .cfi_offset s1, -12
-; RV32I-SFBILOAD-NEXT: .cfi_offset s2, -16
-; RV32I-SFBILOAD-NEXT: .cfi_offset s3, -20
-; RV32I-SFBILOAD-NEXT: .cfi_offset s4, -24
-; RV32I-SFBILOAD-NEXT: .cfi_offset s5, -28
; RV32I-SFBILOAD-NEXT: mv s0, a6
; RV32I-SFBILOAD-NEXT: mv s1, a5
; RV32I-SFBILOAD-NEXT: mv s2, a4
@@ -5319,55 +2294,20 @@ define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: .cfi_restore ra
-; RV32I-SFBILOAD-NEXT: .cfi_restore s0
-; RV32I-SFBILOAD-NEXT: .cfi_restore s1
-; RV32I-SFBILOAD-NEXT: .cfi_restore s2
-; RV32I-SFBILOAD-NEXT: .cfi_restore s3
-; RV32I-SFBILOAD-NEXT: .cfi_restore s4
-; RV32I-SFBILOAD-NEXT: .cfi_restore s5
; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i64_store_64_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: .cfi_offset ra, -8
-; RV64I-SFBILOAD-NEXT: .cfi_offset s0, -16
-; RV64I-SFBILOAD-NEXT: .cfi_offset s1, -24
-; RV64I-SFBILOAD-NEXT: .cfi_offset s2, -32
-; RV64I-SFBILOAD-NEXT: .cfi_offset s3, -40
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB21_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: .cfi_restore ra
-; RV64I-SFBILOAD-NEXT: .cfi_restore s0
-; RV64I-SFBILOAD-NEXT: .cfi_restore s1
-; RV64I-SFBILOAD-NEXT: .cfi_restore s2
-; RV64I-SFBILOAD-NEXT: .cfi_restore s3
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
index 37f7a3020b820..90899c690516a 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
@@ -10,7 +10,7 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-define i32 @test_i8_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+define i32 @test_i8_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i8_s_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lb a4, 4(a0)
@@ -90,7 +90,7 @@ entry:
ret i32 %res1
}
-define i32 @test_i8_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+define i32 @test_i8_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i8_z_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lbu a4, 4(a0)
@@ -170,7 +170,7 @@ entry:
ret i32 %res1
}
-define i32 @test_i16_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+define i32 @test_i16_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i16_s_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lh a4, 8(a0)
@@ -250,7 +250,7 @@ entry:
ret i32 %res1
}
-define i32 @test_i16_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+define i32 @test_i16_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i16_z_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lhu a4, 8(a0)
@@ -330,7 +330,7 @@ entry:
ret i32 %res1
}
-define i32 @test_i32_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) {
+define i32 @test_i32_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i32_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lw a4, 16(a0)
@@ -410,7 +410,7 @@ entry:
}
-define i64 @test_i8_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+define i64 @test_i8_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i8_s_1_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lb a6, 4(a0)
@@ -514,7 +514,7 @@ entry:
ret i64 %res1
}
-define i64 @test_i8_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+define i64 @test_i8_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i8_z_1_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lbu a6, 4(a0)
@@ -614,7 +614,7 @@ entry:
ret i64 %res1
}
-define i64 @test_i16_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+define i64 @test_i16_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i16_s_1_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lh a6, 8(a0)
@@ -718,7 +718,7 @@ entry:
ret i64 %res1
}
-define i64 @test_i16_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+define i64 @test_i16_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i16_z_1_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lhu a6, 8(a0)
@@ -818,7 +818,7 @@ entry:
ret i64 %res1
}
-define i64 @test_i32_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+define i64 @test_i32_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i32_z_1_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lw a6, 16(a0)
@@ -918,7 +918,7 @@ entry:
ret i64 %res1
}
-define i64 @test_i64_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) {
+define i64 @test_i64_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
; RV32I-LABEL: test_i64_1_volatile:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lw a7, 32(a0)
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
index 6c500468bb187..984e101e8a937 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
@@ -10,7 +10,7 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_s:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -78,7 +78,7 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_z:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -146,7 +146,7 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_s:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -214,7 +214,7 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_z:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -282,7 +282,7 @@ entry:
ret i32 %res
}
-define i32 @test_i32(ptr %base, i1 %x, i32 %b) {
+define i32 @test_i32(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-LABEL: test_i32:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -349,7 +349,7 @@ entry:
ret i32 %res
}
-define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_s_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lb a0, 4(a0)
@@ -424,7 +424,7 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_z_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lbu a0, 4(a0)
@@ -499,7 +499,7 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_s_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lh a0, 8(a0)
@@ -574,7 +574,7 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_z_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lhu a0, 8(a0)
@@ -649,7 +649,7 @@ entry:
ret i32 %res
}
-define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) {
+define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i32_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lw a0, 16(a0)
@@ -723,7 +723,7 @@ entry:
ret i32 %res
}
-define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_s_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -805,7 +805,7 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_z_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -885,7 +885,7 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_s_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -967,7 +967,7 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_z_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -1047,7 +1047,7 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -1127,7 +1127,7 @@ entry:
ret i64 %res
}
-define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) {
+define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-LABEL: test_i64_1:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: andi a1, a1, 1
@@ -1208,7 +1208,7 @@ entry:
ret i64 %res
}
-define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lb a0, 4(a0)
@@ -1301,7 +1301,7 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lbu a0, 4(a0)
@@ -1391,7 +1391,7 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lh a0, 8(a0)
@@ -1484,7 +1484,7 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lhu a0, 8(a0)
@@ -1574,7 +1574,7 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lw a0, 16(a0)
@@ -1664,7 +1664,7 @@ entry:
ret i64 %res
}
-define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) {
+define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i64_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: mv a7, a1
>From f959b128506a1c6ef0d04c7e993ce6720a47f4eb Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Mon, 8 Dec 2025 11:20:54 +0530
Subject: [PATCH 08/11] fixup! Address comments
Change-Id: I80edab151862240167562e535385b207b811e546
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 +-
...-branch-opt-load-atomic-acquire-seq_cst.ll | 4284 ++++++++++++++
...-forward-branch-opt-load-atomic-acquire.ll | 2205 -------
...orward-branch-opt-load-atomic-monotonic.ll | 2091 -------
...-forward-branch-opt-load-atomic-seq_cst.ll | 2319 --------
.../short-forward-branch-opt-load-volatile.ll | 1022 ----
.../RISCV/short-forward-branch-opt-load.ll | 5115 ++++++++++++++++-
7 files changed, 9218 insertions(+), 7821 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll
delete mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
delete mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
delete mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
delete mode 100644 llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1940d36af4dcd..11688476a2554 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -934,8 +934,7 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
return nullptr;
MachineRegisterInfo &MRI = MF.getRegInfo();
- bool Invert =
- (MRI.getVRegDef(MI.getOperand(4).getReg()) == &LoadMI) ? true : false;
+ bool Invert = MRI.getVRegDef(MI.getOperand(4).getReg()) == &LoadMI;
MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
Register DestReg = MI.getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll
new file mode 100644
index 0000000000000..d4e418ebb8fd3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll
@@ -0,0 +1,4284 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i32_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB4_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB4_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB4_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB4_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_s_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_s_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB5_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_z_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB6_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_s_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB7_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_z_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i32_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB9_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_s_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB10_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB10_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB10_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB10_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB10_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB10_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB10_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB11_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB11_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB11_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB11_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB11_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB11_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB11_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB11_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_s_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB12_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB12_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB12_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB12_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB12_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB12_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB12_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB12_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB12_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB13_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB13_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB13_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB13_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB13_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB13_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB13_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB13_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB13_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB13_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i32_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: beqz s2, .LBB14_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: .LBB14_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: beqz s1, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: .LBB14_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: addi a2, sp, 12
+; RV32I-SFB-NEXT: li a3, 2
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 12(sp)
+; RV32I-SFB-NEXT: bnez s2, .LBB14_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB14_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB14_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 2
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s1, .LBB14_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
+; RV32I-SFBILOAD-NEXT: li a3, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i64_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: bnez s2, .LBB15_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB15_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB15_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB15_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: bnez s2, .LBB15_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB15_2: # %entry
+; RV32I-SFB-NEXT: bnez s2, .LBB15_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: .LBB15_4: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB15_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB15_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr acquire, align 8 ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_s_store_64_3(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i8_s_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB16_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB16_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB16_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB16_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB16_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB16_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB16_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB16_4: # %entry
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB16_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB16_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_store_64_3(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i8_z_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB17_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB17_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB17_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB17_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB17_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB17_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB17_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB17_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB17_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB17_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_store_64_3(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i16_s_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB18_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB18_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB18_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB18_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB18_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB18_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB18_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB18_4: # %entry
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB18_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB18_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_store_64_3(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i16_z_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB19_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB19_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB19_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB19_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB19_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB19_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB19_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB19_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB19_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB19_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_store_64_3(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i32_z_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s3, a6
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: mv s5, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: mv a2, sp
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: sw s4, 0(s5)
+; RV32I-NEXT: sw s3, 4(s5)
+; RV32I-NEXT: bnez s1, .LBB20_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: .LBB20_2: # %entry
+; RV32I-NEXT: addi a1, s1, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: lwu a0, 4(sp)
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB20_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB20_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: mv a2, sp
+; RV32I-SFB-NEXT: li a3, 2
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 0(sp)
+; RV32I-SFB-NEXT: beqz s5, .LBB20_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB20_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB20_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB20_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 2
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s3, .LBB20_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB20_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: mv a2, sp
+; RV32I-SFBILOAD-NEXT: li a3, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_store_64_3(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i64_store_64_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB21_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB21_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_store_64_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB21_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB21_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_store_64_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 2
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: bnez s5, .LBB21_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB21_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB21_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: .LBB21_4: # %entry
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_store_64_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB21_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB21_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_store_64_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_store_64_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr acquire, align 8 ; load 64-bit value
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i32 @test_i8_s_4(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_s_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB22_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB22_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB22_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB22_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB22_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB22_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB22_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB22_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB22_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB22_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB22_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB22_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_4(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_z_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB23_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB23_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB23_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB23_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB23_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB23_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB23_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB23_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB23_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB23_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB23_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB23_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_4(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_s_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB24_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB24_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB24_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB24_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB24_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB24_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB24_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB24_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB24_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB24_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB24_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB24_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_4(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_z_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB25_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB25_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB25_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB25_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB25_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB25_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB25_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB25_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB25_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB25_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB25_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB25_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_4(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i32_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB26_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB26_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB26_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB26_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB26_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB26_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB26_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB26_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB26_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB26_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB26_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB26_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_s_store_4(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_s_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB27_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB27_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB27_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB27_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB27_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB27_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB27_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB27_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB27_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB27_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB27_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB27_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store_4(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_z_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB28_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB28_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB28_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB28_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB28_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB28_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB28_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB28_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB28_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB28_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB28_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB28_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store_4(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_s_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB29_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB29_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB29_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB29_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB29_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB29_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB29_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB29_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB29_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB29_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB29_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB29_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store_4(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_z_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB30_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB30_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB30_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB30_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB30_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB30_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB30_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB30_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB30_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB30_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB30_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB30_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store_4(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i32_store_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB31_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB31_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB31_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB31_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB31_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB31_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB31_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB31_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB31_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB31_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB31_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB31_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1_4(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_s_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB32_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB32_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB32_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB32_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB32_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB32_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB32_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB32_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB32_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB32_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB32_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB32_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB32_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB32_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB32_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB32_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1_4(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_z_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB33_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB33_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB33_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB33_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB33_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB33_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB33_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB33_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB33_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB33_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB33_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB33_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB33_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB33_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB33_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB33_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1_4(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_s_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB34_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB34_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB34_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB34_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB34_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB34_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB34_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB34_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB34_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB34_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB34_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB34_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB34_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB34_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB34_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB34_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1_4(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_z_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB35_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB35_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB35_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB35_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB35_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB35_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB35_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB35_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB35_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB35_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB35_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB35_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB35_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB35_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB35_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB35_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_1_4(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i32_z_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: beqz s2, .LBB36_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: .LBB36_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: beqz s1, .LBB36_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: .LBB36_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: addi a2, sp, 12
+; RV32I-SFB-NEXT: li a3, 5
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 12(sp)
+; RV32I-SFB-NEXT: bnez s2, .LBB36_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB36_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB36_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB36_4: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 5
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s1, .LBB36_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB36_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
+; RV32I-SFBILOAD-NEXT: li a3, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB36_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: .LBB36_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB36_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB36_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB36_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: .LBB36_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1_4(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i64_1_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: bnez s2, .LBB37_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB37_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB37_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB37_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: bnez s2, .LBB37_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB37_2: # %entry
+; RV32I-SFB-NEXT: bnez s2, .LBB37_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: .LBB37_4: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB37_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB37_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB37_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: .LBB37_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB37_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: .LBB37_4: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB37_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB37_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr seq_cst, align 8 ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_s_store_64_4(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i8_s_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB38_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB38_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB38_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB38_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB38_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB38_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB38_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB38_4: # %entry
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB38_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB38_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB38_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB38_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB38_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB38_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB38_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB38_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_store_64_4(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i8_z_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB39_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB39_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB39_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB39_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB39_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB39_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB39_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB39_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB39_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB39_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB39_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB39_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB39_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB39_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB39_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB39_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_store_64_4(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i16_s_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB40_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB40_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB40_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB40_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB40_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB40_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB40_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB40_4: # %entry
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB40_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB40_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB40_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB40_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB40_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB40_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB40_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB40_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_store_64_4(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i16_z_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB41_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB41_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB41_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB41_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB41_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB41_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB41_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB41_4: # %entry
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB41_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB41_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB41_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB41_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB41_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB41_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB41_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB41_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_store_64_4(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i32_z_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s3, a6
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: mv s5, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: mv a2, sp
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: sw s4, 0(s5)
+; RV32I-NEXT: sw s3, 4(s5)
+; RV32I-NEXT: bnez s1, .LBB42_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: .LBB42_2: # %entry
+; RV32I-NEXT: addi a1, s1, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: lwu a0, 4(sp)
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB42_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB42_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: mv a2, sp
+; RV32I-SFB-NEXT: li a3, 5
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 0(sp)
+; RV32I-SFB-NEXT: beqz s5, .LBB42_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB42_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB42_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB42_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 5
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s3, .LBB42_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB42_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: mv a2, sp
+; RV32I-SFBILOAD-NEXT: li a3, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB42_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB42_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB42_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB42_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 5
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB42_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB42_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_store_64_4(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i64_store_64_4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB43_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB43_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_store_64_4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB43_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB43_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_store_64_4:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 5
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: bnez s5, .LBB43_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB43_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB43_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: .LBB43_4: # %entry
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_store_64_4:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB43_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB43_2: # %entry
+; RV64I-SFB-NEXT: sd a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_store_64_4:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 5
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB43_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB43_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB43_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: .LBB43_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_store_64_4:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB43_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB43_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr seq_cst, align 8 ; load 64-bit value
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
deleted file mode 100644
index 1ba01ac5225d3..0000000000000
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
+++ /dev/null
@@ -1,2205 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-
-define i32 @test_i8_s_3(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i8_s_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB0_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB0_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB0_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB0_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB0_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB0_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB0_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB0_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = sext i8 %val to i32 ; sign-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_z_3(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i8_z_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB1_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB1_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB1_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB1_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB1_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB1_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB1_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB1_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = zext i8 %val to i32 ; zero-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_s_3(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i16_s_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB2_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB2_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB2_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB2_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB2_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB2_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB2_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB2_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = sext i16 %val to i32 ; sign-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_z_3(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i16_z_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB3_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB3_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB3_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB3_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB3_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB3_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB3_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB3_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = zext i16 %val to i32 ; zero-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i32_3(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i32_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB4_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB4_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lw a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB4_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB4_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB4_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB4_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB4_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB4_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
- %res = select i1 %x, i32 %val, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i8_s_store_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB5_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB5_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_store_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB5_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB5_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_store_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB5_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_store_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB5_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB5_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_store_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_store_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = sext i8 %val to i32 ; sign-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i8_z_store_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB6_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB6_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_store_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB6_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB6_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_store_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB6_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_store_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB6_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB6_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_store_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_store_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = zext i8 %val to i32 ; zero-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_s_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i16_s_store_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB7_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB7_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_store_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB7_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB7_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_store_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB7_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB7_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB7_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_store_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_store_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = sext i16 %val to i32 ; sign-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_z_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i16_z_store_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB8_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB8_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_store_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB8_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB8_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_store_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB8_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_store_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB8_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB8_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_store_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_store_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = zext i16 %val to i32 ; zero-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i32_store_3(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i32_store_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB9_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB9_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_store_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lw a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB9_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB9_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_store_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB9_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_store_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB9_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB9_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_store_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_store_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %val, i32 %b
- ret i32 %res
-}
-
-define i64 @test_i8_s_1_3(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i8_s_1_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB10_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB10_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_1_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB10_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB10_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_1_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB10_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB10_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB10_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB10_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_1_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB10_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB10_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_1_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_1_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = sext i8 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_z_1_3(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i8_z_1_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB11_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB11_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_1_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB11_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB11_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_1_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB11_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB11_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB11_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB11_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_1_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB11_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB11_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_1_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_1_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = zext i8 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_s_1_3(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i16_s_1_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB12_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB12_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_1_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB12_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB12_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_1_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB12_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB12_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB12_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB12_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_1_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB12_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB12_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_1_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_1_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = sext i16 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_z_1_3(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i16_z_1_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB13_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB13_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_1_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB13_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB13_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_1_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB13_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB13_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB13_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB13_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_1_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB13_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB13_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_1_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_1_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = zext i16 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i32_z_1_3(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i32_z_1_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: addi a2, sp, 12
-; RV32I-NEXT: li a3, 2
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: beqz s2, .LBB14_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lw s1, 12(sp)
-; RV32I-NEXT: .LBB14_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_z_1_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 2
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: beqz s1, .LBB14_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lwu s0, 4(sp)
-; RV64I-NEXT: .LBB14_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_z_1_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: addi a2, sp, 12
-; RV32I-SFB-NEXT: li a3, 2
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 12(sp)
-; RV32I-SFB-NEXT: bnez s2, .LBB14_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: .LBB14_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB14_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li s0, 0
-; RV32I-SFB-NEXT: .LBB14_4: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_z_1_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 2
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s1, .LBB14_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: .LBB14_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_z_1_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
-; RV32I-SFBILOAD-NEXT: li a3, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
-; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
-; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_z_1_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
-; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
- %ext = zext i32 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i64_1_3(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i64_1_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_8
-; RV32I-NEXT: bnez s2, .LBB15_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: .LBB15_2: # %entry
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i64_1_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: ld a0, 32(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB15_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB15_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i64_1_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 32
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_8
-; RV32I-SFB-NEXT: bnez s2, .LBB15_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: .LBB15_2: # %entry
-; RV32I-SFB-NEXT: bnez s2, .LBB15_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: .LBB15_4: # %entry
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i64_1_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB15_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB15_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i64_1_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_8
-; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i64_1_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i64, ptr %addr acquire, align 8 ; load 64-bit value
- %res = select i1 %x, i64 %val, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i8_s_store_64_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB16_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB16_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_store_64_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB16_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB16_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_store_64_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB16_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB16_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB16_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB16_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_store_64_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB16_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB16_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = sext i8 %val to i64 ; sign-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i8_z_store_64_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB17_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB17_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_store_64_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB17_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB17_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_store_64_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB17_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB17_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB17_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB17_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_store_64_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB17_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB17_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
- %ext = zext i8 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_s_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i16_s_store_64_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB18_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB18_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_store_64_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB18_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB18_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_store_64_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB18_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB18_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB18_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB18_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_64_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB18_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB18_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = sext i16 %val to i64 ; sign-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i16_z_store_64_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB19_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB19_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_store_64_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB19_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB19_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_store_64_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB19_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB19_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB19_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB19_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_store_64_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB19_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB19_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
- %ext = zext i16 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i32_z_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i32_z_store_64_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: mv a2, sp
-; RV32I-NEXT: li a3, 2
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: lw a0, 0(sp)
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: bnez s5, .LBB20_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: .LBB20_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_z_store_64_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 2
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: lwu a0, 4(sp)
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB20_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: .LBB20_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_z_store_64_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: mv a2, sp
-; RV32I-SFB-NEXT: li a3, 2
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 0(sp)
-; RV32I-SFB-NEXT: beqz s5, .LBB20_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
-; RV32I-SFB-NEXT: .LBB20_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB20_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: .LBB20_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_z_store_64_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 2
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s3, .LBB20_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: .LBB20_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: mv a2, sp
-; RV32I-SFBILOAD-NEXT: li a3, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
-; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
- %ext = zext i32 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i64_store_64_3(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i64_store_64_3:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: li a1, 2
-; RV32I-NEXT: call __atomic_load_8
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: bnez s5, .LBB21_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: .LBB21_2: # %entry
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i64_store_64_3:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: ld a0, 32(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB21_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB21_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i64_store_64_3:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 32
-; RV32I-SFB-NEXT: li a1, 2
-; RV32I-SFB-NEXT: call __atomic_load_8
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: bnez s5, .LBB21_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: .LBB21_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB21_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: .LBB21_4: # %entry
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i64_store_64_3:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB21_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB21_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i64_store_64_3:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV32I-SFBILOAD-NEXT: li a1, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load_8
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i64_store_64_3:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i64, ptr %addr acquire, align 8 ; load 64-bit value
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %val, i64 %b
- ret i64 %res
-}
-
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
deleted file mode 100644
index 9d3606dca49a8..0000000000000
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll
+++ /dev/null
@@ -1,2091 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-
-define i32 @test_i8_s_2(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i8_s_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB0_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB0_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB0_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB0_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB0_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB0_2: # %entry
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB0_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB0_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = sext i8 %val to i32 ; sign-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_z_2(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i8_z_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB1_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB1_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB1_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB1_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB1_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB1_2: # %entry
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB1_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB1_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = zext i8 %val to i32 ; zero-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_s_2(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i16_s_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB2_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB2_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB2_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB2_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB2_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB2_2: # %entry
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB2_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB2_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = sext i16 %val to i32 ; sign-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_z_2(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i16_z_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB3_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB3_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB3_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB3_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB3_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB3_2: # %entry
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB3_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB3_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = zext i16 %val to i32 ; zero-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i32_2(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i32_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB4_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB4_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lw a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB4_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB4_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB4_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB4_2: # %entry
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB4_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB4_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
- %res = select i1 %x, i32 %val, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i8_s_store_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB5_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB5_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_store_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB5_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB5_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_store_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB5_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_store_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB5_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB5_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_store_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_store_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = sext i8 %val to i32 ; sign-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i8_z_store_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB6_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB6_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_store_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB6_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB6_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_store_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB6_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_store_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB6_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB6_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_store_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_store_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = zext i8 %val to i32 ; zero-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_s_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i16_s_store_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB7_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB7_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_store_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB7_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB7_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_store_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB7_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB7_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB7_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_store_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_store_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = sext i16 %val to i32 ; sign-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_z_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i16_z_store_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB8_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB8_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_store_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB8_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB8_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_store_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB8_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_store_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB8_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB8_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_store_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_store_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = zext i16 %val to i32 ; zero-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i32_store_2(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i32_store_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB9_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB9_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_store_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lw a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB9_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB9_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_store_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB9_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_store_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB9_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB9_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_store_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_store_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %val, i32 %b
- ret i32 %res
-}
-
-define i64 @test_i8_s_1_2(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i8_s_1_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB10_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB10_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_1_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB10_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB10_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_1_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB10_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB10_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB10_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB10_4: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_1_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB10_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB10_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_1_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_1_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = sext i8 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_z_1_2(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i8_z_1_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB11_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB11_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_1_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB11_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB11_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_1_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB11_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB11_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB11_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB11_4: # %entry
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_1_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB11_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB11_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_1_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_1_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = zext i8 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_s_1_2(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i16_s_1_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB12_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB12_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_1_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB12_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB12_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_1_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB12_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB12_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB12_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB12_4: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_1_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB12_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB12_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_1_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_1_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = sext i16 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_z_1_2(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i16_z_1_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB13_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB13_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_1_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB13_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB13_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_1_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB13_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB13_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB13_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB13_4: # %entry
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_1_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB13_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB13_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_1_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_1_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = zext i16 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i32_z_1_2(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i32_z_1_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: addi a2, sp, 12
-; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: beqz s2, .LBB14_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lw s1, 12(sp)
-; RV32I-NEXT: .LBB14_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_z_1_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 0
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: beqz s1, .LBB14_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lwu s0, 4(sp)
-; RV64I-NEXT: .LBB14_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_z_1_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: addi a2, sp, 12
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 12(sp)
-; RV32I-SFB-NEXT: bnez s2, .LBB14_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: .LBB14_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB14_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li s0, 0
-; RV32I-SFB-NEXT: .LBB14_4: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_z_1_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 0
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s1, .LBB14_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: .LBB14_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_z_1_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
-; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
-; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_z_1_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
-; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
- %ext = zext i32 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i64_1_2(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i64_1_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_8
-; RV32I-NEXT: bnez s2, .LBB15_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: .LBB15_2: # %entry
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i64_1_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: ld a0, 32(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB15_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB15_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i64_1_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 32
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_8
-; RV32I-SFB-NEXT: bnez s2, .LBB15_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: .LBB15_2: # %entry
-; RV32I-SFB-NEXT: bnez s2, .LBB15_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: .LBB15_4: # %entry
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i64_1_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB15_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB15_2: # %entry
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i64_1_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_8
-; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i64_1_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i64, ptr %addr monotonic, align 8 ; load 64-bit value
- %res = select i1 %x, i64 %val, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i8_s_store_64_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB16_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB16_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_store_64_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB16_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB16_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_store_64_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB16_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB16_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB16_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB16_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_store_64_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB16_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB16_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = sext i8 %val to i64 ; sign-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i8_z_store_64_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB17_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB17_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_store_64_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB17_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB17_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_store_64_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB17_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB17_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB17_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB17_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_store_64_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB17_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB17_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
- %ext = zext i8 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_s_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i16_s_store_64_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB18_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB18_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_store_64_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB18_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB18_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_store_64_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB18_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB18_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB18_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB18_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_64_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB18_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB18_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = sext i16 %val to i64 ; sign-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i16_z_store_64_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB19_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB19_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_store_64_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB19_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB19_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_store_64_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB19_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB19_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB19_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB19_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_store_64_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB19_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB19_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
- %ext = zext i16 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i32_z_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i32_z_store_64_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: mv a2, sp
-; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: lw a0, 0(sp)
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: bnez s5, .LBB20_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: .LBB20_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_z_store_64_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 0
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: lwu a0, 4(sp)
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB20_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: .LBB20_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_z_store_64_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: mv a2, sp
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 0(sp)
-; RV32I-SFB-NEXT: beqz s5, .LBB20_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
-; RV32I-SFB-NEXT: .LBB20_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB20_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: .LBB20_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_z_store_64_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 0
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s3, .LBB20_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: .LBB20_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: mv a2, sp
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
-; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
- %ext = zext i32 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i64_store_64_2(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i64_store_64_2:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_8
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: bnez s5, .LBB21_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: .LBB21_2: # %entry
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i64_store_64_2:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: ld a0, 32(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB21_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB21_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i64_store_64_2:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 32
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_8
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: bnez s5, .LBB21_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: .LBB21_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB21_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: .LBB21_4: # %entry
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i64_store_64_2:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB21_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB21_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i64_store_64_2:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_8
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i64_store_64_2:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i64, ptr %addr monotonic, align 8 ; load 64-bit value
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %val, i64 %b
- ret i64 %res
-}
-
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
deleted file mode 100644
index f4aa40185ed9c..0000000000000
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll
+++ /dev/null
@@ -1,2319 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-
-define i32 @test_i8_s_4(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i8_s_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB0_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB0_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB0_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB0_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB0_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB0_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB0_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB0_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = sext i8 %val to i32 ; sign-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_z_4(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i8_z_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB1_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB1_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB1_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB1_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB1_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB1_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB1_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB1_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = zext i8 %val to i32 ; zero-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_s_4(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i16_s_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB2_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB2_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB2_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB2_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB2_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB2_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB2_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB2_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = sext i16 %val to i32 ; sign-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_z_4(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i16_z_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB3_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB3_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB3_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB3_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB3_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB3_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB3_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB3_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = zext i16 %val to i32 ; zero-extend to 32 bits
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i32_4(ptr %base, i1 %x, i32 %b) nounwind {
-; RV32I-LABEL: test_i32_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB4_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB4_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lw a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB4_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB4_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB4_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB4_2: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB4_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB4_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
- %res = select i1 %x, i32 %val, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i8_s_store_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB5_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB5_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_store_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB5_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB5_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_store_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB5_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_store_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB5_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB5_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_store_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_store_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = sext i8 %val to i32 ; sign-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i8_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i8_z_store_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB6_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB6_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_store_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB6_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB6_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_store_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB6_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_store_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB6_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB6_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_store_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_store_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = zext i8 %val to i32 ; zero-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_s_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i16_s_store_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB7_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB7_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_store_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB7_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB7_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_store_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB7_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB7_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB7_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_store_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_store_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = sext i16 %val to i32 ; sign-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i16_z_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i16_z_store_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB8_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB8_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_store_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB8_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB8_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_store_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB8_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_store_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB8_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB8_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_store_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_store_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = zext i16 %val to i32 ; zero-extend to 32 bits
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- ret i32 %res
-}
-
-define i32 @test_i32_store_4(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
-; RV32I-LABEL: test_i32_store_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a4, 0(a3)
-; RV32I-NEXT: bnez a1, .LBB9_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB9_2: # %entry
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_store_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lw a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sw a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB9_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB9_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_store_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: bnez a1, .LBB9_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: sw a4, 0(a3)
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_store_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB9_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB9_2: # %entry
-; RV64I-SFB-NEXT: sw a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_store_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_store_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
- %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
- store i32 %c, ptr %base1
- %res = select i1 %x, i32 %val, i32 %b
- ret i32 %res
-}
-
-define i64 @test_i8_s_1_4(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i8_s_1_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB10_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB10_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_1_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB10_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB10_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_1_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB10_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB10_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB10_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB10_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_1_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB10_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB10_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_1_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_1_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = sext i8 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_z_1_4(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i8_z_1_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB11_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB11_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_1_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB11_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB11_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_1_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB11_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB11_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB11_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB11_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_1_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB11_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB11_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_1_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_1_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = zext i8 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_s_1_4(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i16_s_1_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB12_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB12_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_1_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB12_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB12_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_1_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB12_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB12_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB12_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB12_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_1_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB12_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB12_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_1_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_1_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = sext i16 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_z_1_4(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i16_z_1_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: bnez a1, .LBB13_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB13_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_1_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB13_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB13_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_1_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB13_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB13_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB13_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB13_4: # %entry
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_1_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB13_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB13_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_1_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_1_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = zext i16 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i32_z_1_4(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i32_z_1_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: addi a2, sp, 12
-; RV32I-NEXT: li a3, 5
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: beqz s2, .LBB14_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lw s1, 12(sp)
-; RV32I-NEXT: .LBB14_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_z_1_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s1, a1, 1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 5
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: beqz s1, .LBB14_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lwu s0, 4(sp)
-; RV64I-NEXT: .LBB14_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_z_1_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: addi a2, sp, 12
-; RV32I-SFB-NEXT: li a3, 5
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 12(sp)
-; RV32I-SFB-NEXT: bnez s2, .LBB14_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: .LBB14_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB14_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li s0, 0
-; RV32I-SFB-NEXT: .LBB14_4: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_z_1_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: andi s1, a1, 1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 5
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s1, .LBB14_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: .LBB14_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_z_1_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
-; RV32I-SFBILOAD-NEXT: li a3, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
-; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
-; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_z_1_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: andi s1, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
-; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
- %ext = zext i32 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i64_1_4(ptr %base, i1 %x, i64 %b) nounwind {
-; RV32I-LABEL: test_i64_1_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s2, a1, 1
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_8
-; RV32I-NEXT: bnez s2, .LBB15_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: .LBB15_2: # %entry
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i64_1_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: ld a0, 32(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: bnez a1, .LBB15_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB15_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i64_1_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: andi s2, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 32
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_8
-; RV32I-SFB-NEXT: bnez s2, .LBB15_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: .LBB15_2: # %entry
-; RV32I-SFB-NEXT: bnez s2, .LBB15_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: .LBB15_4: # %entry
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i64_1_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB15_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB15_2: # %entry
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i64_1_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: andi s2, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_8
-; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s2, .LBB15_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: .LBB15_4: # %entry
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i64_1_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB15_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB15_2: # %entry
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i64, ptr %addr seq_cst, align 8 ; load 64-bit value
- %res = select i1 %x, i64 %val, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i8_s_store_64_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB16_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB16_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_store_64_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB16_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB16_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_store_64_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB16_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB16_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB16_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB16_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_store_64_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB16_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB16_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB16_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = sext i8 %val to i64 ; sign-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i8_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i8_z_store_64_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB17_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB17_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_store_64_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB17_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB17_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_store_64_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB17_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB17_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB17_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB17_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_store_64_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB17_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB17_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB17_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB17_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB17_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i8, ptr %addr seq_cst, align 1 ; load 8-bit value
- %ext = zext i8 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_s_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i16_s_store_64_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB18_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB18_2:
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_store_64_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB18_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB18_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_store_64_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB18_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB18_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB18_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB18_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_store_64_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB18_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB18_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB18_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = sext i16 %val to i64 ; sign-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i16_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i16_z_store_64_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: fence rw, rw
-; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: fence r, rw
-; RV32I-NEXT: sw a5, 0(a4)
-; RV32I-NEXT: sw a6, 4(a4)
-; RV32I-NEXT: bnez a1, .LBB19_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: .LBB19_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_store_64_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB19_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB19_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_store_64_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: fence rw, rw
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: fence r, rw
-; RV32I-SFB-NEXT: beqz a1, .LBB19_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: .LBB19_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB19_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB19_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
-; RV32I-SFB-NEXT: sw a6, 4(a4)
-; RV32I-SFB-NEXT: mv a1, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_store_64_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB19_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB19_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: fence rw, rw
-; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: fence r, rw
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB19_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, a2
-; RV32I-SFBILOAD-NEXT: .LBB19_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
-; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB19_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i16, ptr %addr seq_cst, align 2 ; load 16-bit value
- %ext = zext i16 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i32_z_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i32_z_store_64_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: mv a2, sp
-; RV32I-NEXT: li a3, 5
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: lw a0, 0(sp)
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: bnez s5, .LBB20_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: .LBB20_2: # %entry
-; RV32I-NEXT: addi a1, s5, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_z_store_64_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: andi s3, a1, 1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 5
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: lwu a0, 4(sp)
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB20_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: .LBB20_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_z_store_64_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: mv a2, sp
-; RV32I-SFB-NEXT: li a3, 5
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 0(sp)
-; RV32I-SFB-NEXT: beqz s5, .LBB20_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
-; RV32I-SFB-NEXT: .LBB20_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB20_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: .LBB20_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_z_store_64_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: andi s3, a1, 1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 5
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s3, .LBB20_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: .LBB20_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: mv a2, sp
-; RV32I-SFBILOAD-NEXT: li a3, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
-; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: andi s3, a1, 1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
- %ext = zext i32 %val to i64 ; zero-extend to 64 bits
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %ext, i64 %b
- ret i64 %res
-}
-
-define i64 @test_i64_store_64_4(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
-; RV32I-LABEL: test_i64_store_64_4:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: andi s5, a1, 1
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: li a1, 5
-; RV32I-NEXT: call __atomic_load_8
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: bnez s5, .LBB21_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: .LBB21_2: # %entry
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i64_store_64_4:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: fence rw, rw
-; RV64I-NEXT: ld a0, 32(a0)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: fence r, rw
-; RV64I-NEXT: sd a4, 0(a3)
-; RV64I-NEXT: bnez a1, .LBB21_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: .LBB21_2: # %entry
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i64_store_64_4:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: andi s5, a1, 1
-; RV32I-SFB-NEXT: addi a0, a0, 32
-; RV32I-SFB-NEXT: li a1, 5
-; RV32I-SFB-NEXT: call __atomic_load_8
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: bnez s5, .LBB21_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: .LBB21_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB21_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: .LBB21_4: # %entry
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i64_store_64_4:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: fence rw, rw
-; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: fence r, rw
-; RV64I-SFB-NEXT: bnez a1, .LBB21_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB21_2: # %entry
-; RV64I-SFB-NEXT: sd a4, 0(a3)
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i64_store_64_4:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: andi s5, a1, 1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV32I-SFBILOAD-NEXT: li a1, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load_8
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB21_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: .LBB21_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i64_store_64_4:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: fence rw, rw
-; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: fence r, rw
-; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, a2
-; RV64I-SFBILOAD-NEXT: .LBB21_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i64, ptr %addr seq_cst, align 8 ; load 64-bit value
- store i64 %c, ptr %base1
- %res = select i1 %x, i64 %val, i64 %b
- ret i64 %res
-}
-
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
deleted file mode 100644
index 90899c690516a..0000000000000
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll
+++ /dev/null
@@ -1,1022 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
-; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-
-define i32 @test_i8_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i8_s_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a4, 4(a0)
-; RV32I-NEXT: lw a0, 0(a3)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB0_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a4, a2
-; RV32I-NEXT: .LBB0_2: # %entry
-; RV32I-NEXT: add a0, a4, a0
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a4, 4(a0)
-; RV64I-NEXT: lw a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB0_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB0_2: # %entry
-; RV64I-NEXT: addw a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: lw a3, 0(a3)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB0_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB0_2: # %entry
-; RV32I-SFB-NEXT: add a0, a0, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: lw a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB0_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB0_2: # %entry
-; RV64I-SFB-NEXT: addw a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB0_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lb a2, 4(a0)
-; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB0_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
-; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
-; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load i8, ptr %addr ; load 8-bit value
- %ext = sext i8 %val to i32 ; sign-extend to 32 bits
- %val1 = load volatile i32, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- %res1 = add i32 %res, %val1
- ret i32 %res1
-}
-
-define i32 @test_i8_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i8_z_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a4, 4(a0)
-; RV32I-NEXT: lw a0, 0(a3)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB1_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a4, a2
-; RV32I-NEXT: .LBB1_2: # %entry
-; RV32I-NEXT: add a0, a4, a0
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a4, 4(a0)
-; RV64I-NEXT: lw a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB1_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB1_2: # %entry
-; RV64I-NEXT: addw a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: lw a3, 0(a3)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB1_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB1_2: # %entry
-; RV32I-SFB-NEXT: add a0, a0, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: lw a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB1_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB1_2: # %entry
-; RV64I-SFB-NEXT: addw a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB1_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
-; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB1_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
-; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
-; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
- %val = load i8, ptr %addr ; load 8-bit value
- %ext = zext i8 %val to i32 ; zero-extend to 32 bits
- %val1 = load volatile i32, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- %res1 = add i32 %res, %val1
- ret i32 %res1
-}
-
-define i32 @test_i16_s_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i16_s_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a4, 8(a0)
-; RV32I-NEXT: lw a0, 0(a3)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB2_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a4, a2
-; RV32I-NEXT: .LBB2_2: # %entry
-; RV32I-NEXT: add a0, a4, a0
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a4, 8(a0)
-; RV64I-NEXT: lw a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB2_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB2_2: # %entry
-; RV64I-NEXT: addw a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: lw a3, 0(a3)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB2_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB2_2: # %entry
-; RV32I-SFB-NEXT: add a0, a0, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: lw a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB2_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB2_2: # %entry
-; RV64I-SFB-NEXT: addw a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB2_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lh a2, 8(a0)
-; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB2_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
-; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
-; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load i16, ptr %addr ; load 16-bit value
- %ext = sext i16 %val to i32 ; sign-extend to 32 bits
- %val1 = load volatile i32, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- %res1 = add i32 %res, %val1
- ret i32 %res1
-}
-
-define i32 @test_i16_z_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i16_z_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a4, 8(a0)
-; RV32I-NEXT: lw a0, 0(a3)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB3_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a4, a2
-; RV32I-NEXT: .LBB3_2: # %entry
-; RV32I-NEXT: add a0, a4, a0
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a4, 8(a0)
-; RV64I-NEXT: lw a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB3_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB3_2: # %entry
-; RV64I-NEXT: addw a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: lw a3, 0(a3)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB3_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB3_2: # %entry
-; RV32I-SFB-NEXT: add a0, a0, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: lw a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB3_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB3_2: # %entry
-; RV64I-SFB-NEXT: addw a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB3_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
-; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB3_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
-; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
-; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
- %val = load i16, ptr %addr ; load 16-bit value
- %ext = zext i16 %val to i32 ; zero-extend to 32 bits
- %val1 = load volatile i32, ptr %base1
- %res = select i1 %x, i32 %ext, i32 %b
- %res1 = add i32 %res, %val1
- ret i32 %res1
-}
-
-define i32 @test_i32_volatile(ptr %base, i1 %x, i32 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i32_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lw a4, 16(a0)
-; RV32I-NEXT: lw a0, 0(a3)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB4_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a4, a2
-; RV32I-NEXT: .LBB4_2: # %entry
-; RV32I-NEXT: add a0, a4, a0
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lw a4, 16(a0)
-; RV64I-NEXT: lw a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB4_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB4_2: # %entry
-; RV64I-NEXT: addw a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: lw a3, 0(a3)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB4_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
-; RV32I-SFB-NEXT: .LBB4_2: # %entry
-; RV32I-SFB-NEXT: add a0, a0, a3
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: lw a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB4_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB4_2: # %entry
-; RV64I-SFB-NEXT: addw a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB4_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
-; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a3
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB4_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lw a2, 16(a0)
-; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
-; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
- %val = load i32, ptr %addr ; load 32-bit value
- %val1 = load volatile i32, ptr %base1
- %res = select i1 %x, i32 %val, i32 %b
- %res1 = add i32 %res, %val1
- ret i32 %res1
-}
-
-
-define i64 @test_i8_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i8_s_1_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lb a6, 4(a0)
-; RV32I-NEXT: lw a5, 4(a4)
-; RV32I-NEXT: lw a0, 0(a4)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB5_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a6, a2
-; RV32I-NEXT: j .LBB5_3
-; RV32I-NEXT: .LBB5_2:
-; RV32I-NEXT: srai a3, a6, 31
-; RV32I-NEXT: .LBB5_3: # %entry
-; RV32I-NEXT: add a0, a6, a0
-; RV32I-NEXT: sltu a1, a0, a6
-; RV32I-NEXT: add a3, a3, a5
-; RV32I-NEXT: add a1, a3, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_s_1_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lb a4, 4(a0)
-; RV64I-NEXT: ld a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB5_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB5_2: # %entry
-; RV64I-NEXT: add a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_s_1_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: lw a5, 4(a4)
-; RV32I-SFB-NEXT: lw a4, 0(a4)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB5_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB5_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB5_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB5_4: # %entry
-; RV32I-SFB-NEXT: add a0, a2, a4
-; RV32I-SFB-NEXT: sltu a1, a0, a2
-; RV32I-SFB-NEXT: add a3, a3, a5
-; RV32I-SFB-NEXT: add a1, a3, a1
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_s_1_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: ld a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB5_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB5_2: # %entry
-; RV64I-SFB-NEXT: add a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_s_1_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
-; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB5_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB5_4: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a4
-; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
-; RV32I-SFBILOAD-NEXT: add a3, a3, a5
-; RV32I-SFBILOAD-NEXT: add a1, a3, a1
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_s_1_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB5_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
-; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
-; RV64I-SFBILOAD-NEXT: add a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load i8, ptr %addr ; load 8-bit value
- %val1 = load volatile i64, ptr %base1
- %ext = sext i8 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- %res1 = add i64 %res, %val1
- ret i64 %res1
-}
-
-define i64 @test_i8_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i8_z_1_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lbu a6, 4(a0)
-; RV32I-NEXT: lw a5, 4(a4)
-; RV32I-NEXT: lw a0, 0(a4)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB6_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a6, a2
-; RV32I-NEXT: .LBB6_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: add a0, a6, a0
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: sltu a2, a0, a6
-; RV32I-NEXT: add a1, a1, a5
-; RV32I-NEXT: add a1, a1, a2
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i8_z_1_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lbu a4, 4(a0)
-; RV64I-NEXT: ld a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB6_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB6_2: # %entry
-; RV64I-NEXT: add a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i8_z_1_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lbu a5, 4(a0)
-; RV32I-SFB-NEXT: lw a6, 4(a4)
-; RV32I-SFB-NEXT: lw a0, 0(a4)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB6_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a5, a2
-; RV32I-SFB-NEXT: .LBB6_2: # %entry
-; RV32I-SFB-NEXT: add a0, a5, a0
-; RV32I-SFB-NEXT: sltu a2, a0, a5
-; RV32I-SFB-NEXT: bnez a1, .LBB6_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: add a6, a6, a3
-; RV32I-SFB-NEXT: .LBB6_4: # %entry
-; RV32I-SFB-NEXT: add a1, a6, a2
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i8_z_1_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: ld a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB6_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB6_2: # %entry
-; RV64I-SFB-NEXT: add a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i8_z_1_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
-; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB6_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
-; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a4
-; RV32I-SFBILOAD-NEXT: sltu a2, a0, a2
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: add a5, a5, a3
-; RV32I-SFBILOAD-NEXT: .LBB6_4: # %entry
-; RV32I-SFBILOAD-NEXT: add a1, a5, a2
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i8_z_1_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB6_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
-; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
-; RV64I-SFBILOAD-NEXT: add a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
- %val = load i8, ptr %addr ; load 8-bit value
- %val1 = load volatile i64, ptr %base1
- %ext = zext i8 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- %res1 = add i64 %res, %val1
- ret i64 %res1
-}
-
-define i64 @test_i16_s_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i16_s_1_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lh a6, 8(a0)
-; RV32I-NEXT: lw a5, 4(a4)
-; RV32I-NEXT: lw a0, 0(a4)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB7_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a6, a2
-; RV32I-NEXT: j .LBB7_3
-; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: srai a3, a6, 31
-; RV32I-NEXT: .LBB7_3: # %entry
-; RV32I-NEXT: add a0, a6, a0
-; RV32I-NEXT: sltu a1, a0, a6
-; RV32I-NEXT: add a3, a3, a5
-; RV32I-NEXT: add a1, a3, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_s_1_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lh a4, 8(a0)
-; RV64I-NEXT: ld a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB7_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB7_2: # %entry
-; RV64I-NEXT: add a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_s_1_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: lw a5, 4(a4)
-; RV32I-SFB-NEXT: lw a4, 0(a4)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: beqz a1, .LBB7_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai a3, a0, 31
-; RV32I-SFB-NEXT: .LBB7_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB7_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a2, a0
-; RV32I-SFB-NEXT: .LBB7_4: # %entry
-; RV32I-SFB-NEXT: add a0, a2, a4
-; RV32I-SFB-NEXT: sltu a1, a0, a2
-; RV32I-SFB-NEXT: add a3, a3, a5
-; RV32I-SFB-NEXT: add a1, a3, a1
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_s_1_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: ld a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB7_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB7_2: # %entry
-; RV64I-SFB-NEXT: add a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_s_1_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
-; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
-; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB7_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a2, a0
-; RV32I-SFBILOAD-NEXT: .LBB7_4: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a4
-; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
-; RV32I-SFBILOAD-NEXT: add a3, a3, a5
-; RV32I-SFBILOAD-NEXT: add a1, a3, a1
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_s_1_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB7_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
-; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
-; RV64I-SFBILOAD-NEXT: add a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load i16, ptr %addr ; load 16-bit value
- %val1 = load volatile i64, ptr %base1
- %ext = sext i16 %val to i64 ; sign-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- %res1 = add i64 %res, %val1
- ret i64 %res1
-}
-
-define i64 @test_i16_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i16_z_1_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lhu a6, 8(a0)
-; RV32I-NEXT: lw a5, 4(a4)
-; RV32I-NEXT: lw a0, 0(a4)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB8_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a6, a2
-; RV32I-NEXT: .LBB8_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: add a0, a6, a0
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: sltu a2, a0, a6
-; RV32I-NEXT: add a1, a1, a5
-; RV32I-NEXT: add a1, a1, a2
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i16_z_1_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lhu a4, 8(a0)
-; RV64I-NEXT: ld a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB8_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB8_2: # %entry
-; RV64I-NEXT: add a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i16_z_1_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lhu a5, 8(a0)
-; RV32I-SFB-NEXT: lw a6, 4(a4)
-; RV32I-SFB-NEXT: lw a0, 0(a4)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB8_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a5, a2
-; RV32I-SFB-NEXT: .LBB8_2: # %entry
-; RV32I-SFB-NEXT: add a0, a5, a0
-; RV32I-SFB-NEXT: sltu a2, a0, a5
-; RV32I-SFB-NEXT: bnez a1, .LBB8_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: add a6, a6, a3
-; RV32I-SFB-NEXT: .LBB8_4: # %entry
-; RV32I-SFB-NEXT: add a1, a6, a2
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i16_z_1_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: ld a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB8_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB8_2: # %entry
-; RV64I-SFB-NEXT: add a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i16_z_1_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
-; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB8_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
-; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a4
-; RV32I-SFBILOAD-NEXT: sltu a2, a0, a2
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: add a5, a5, a3
-; RV32I-SFBILOAD-NEXT: .LBB8_4: # %entry
-; RV32I-SFBILOAD-NEXT: add a1, a5, a2
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i16_z_1_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB8_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
-; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
-; RV64I-SFBILOAD-NEXT: add a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
- %val = load i16, ptr %addr ; load 16-bit value
- %val1 = load volatile i64, ptr %base1
- %ext = zext i16 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- %res1 = add i64 %res, %val1
- ret i64 %res1
-}
-
-define i64 @test_i32_z_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i32_z_1_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lw a6, 16(a0)
-; RV32I-NEXT: lw a5, 4(a4)
-; RV32I-NEXT: lw a0, 0(a4)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB9_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a6, a2
-; RV32I-NEXT: .LBB9_2: # %entry
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: add a0, a6, a0
-; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: sltu a2, a0, a6
-; RV32I-NEXT: add a1, a1, a5
-; RV32I-NEXT: add a1, a1, a2
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i32_z_1_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lwu a4, 16(a0)
-; RV64I-NEXT: ld a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB9_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB9_2: # %entry
-; RV64I-NEXT: add a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i32_z_1_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a5, 16(a0)
-; RV32I-SFB-NEXT: lw a6, 4(a4)
-; RV32I-SFB-NEXT: lw a0, 0(a4)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB9_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a5, a2
-; RV32I-SFB-NEXT: .LBB9_2: # %entry
-; RV32I-SFB-NEXT: add a0, a5, a0
-; RV32I-SFB-NEXT: sltu a2, a0, a5
-; RV32I-SFB-NEXT: bnez a1, .LBB9_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: add a6, a6, a3
-; RV32I-SFB-NEXT: .LBB9_4: # %entry
-; RV32I-SFB-NEXT: add a1, a6, a2
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i32_z_1_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: lwu a0, 16(a0)
-; RV64I-SFB-NEXT: ld a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB9_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB9_2: # %entry
-; RV64I-SFB-NEXT: add a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i32_z_1_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
-; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB9_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
-; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a4
-; RV32I-SFBILOAD-NEXT: sltu a2, a0, a2
-; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: add a5, a5, a3
-; RV32I-SFBILOAD-NEXT: .LBB9_4: # %entry
-; RV32I-SFBILOAD-NEXT: add a1, a5, a2
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i32_z_1_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB9_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lwu a2, 16(a0)
-; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
-; RV64I-SFBILOAD-NEXT: add a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load i32, ptr %addr ; load 32-bit value
- %val1 = load volatile i64, ptr %base1
- %ext = zext i32 %val to i64 ; zero-extend to 64 bits
- %res = select i1 %x, i64 %ext, i64 %b
- %res1 = add i64 %res, %val1
- ret i64 %res1
-}
-
-define i64 @test_i64_1_volatile(ptr %base, i1 %x, i64 %b, ptr %base1) nounwind {
-; RV32I-LABEL: test_i64_1_volatile:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lw a7, 32(a0)
-; RV32I-NEXT: lw a6, 36(a0)
-; RV32I-NEXT: lw a5, 4(a4)
-; RV32I-NEXT: lw a0, 0(a4)
-; RV32I-NEXT: andi a1, a1, 1
-; RV32I-NEXT: bnez a1, .LBB10_2
-; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: mv a7, a2
-; RV32I-NEXT: .LBB10_2: # %entry
-; RV32I-NEXT: add a0, a7, a0
-; RV32I-NEXT: sltu a1, a0, a7
-; RV32I-NEXT: add a5, a6, a5
-; RV32I-NEXT: add a1, a5, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_i64_1_volatile:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: ld a4, 32(a0)
-; RV64I-NEXT: ld a0, 0(a3)
-; RV64I-NEXT: andi a1, a1, 1
-; RV64I-NEXT: bnez a1, .LBB10_2
-; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a4, a2
-; RV64I-NEXT: .LBB10_2: # %entry
-; RV64I-NEXT: add a0, a4, a0
-; RV64I-NEXT: ret
-;
-; RV32I-SFB-LABEL: test_i64_1_volatile:
-; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: lw a5, 32(a0)
-; RV32I-SFB-NEXT: lw a6, 36(a0)
-; RV32I-SFB-NEXT: lw a7, 4(a4)
-; RV32I-SFB-NEXT: lw a0, 0(a4)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB10_2
-; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a6, a3
-; RV32I-SFB-NEXT: .LBB10_2: # %entry
-; RV32I-SFB-NEXT: bnez a1, .LBB10_4
-; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a5, a2
-; RV32I-SFB-NEXT: .LBB10_4: # %entry
-; RV32I-SFB-NEXT: add a0, a5, a0
-; RV32I-SFB-NEXT: sltu a1, a0, a5
-; RV32I-SFB-NEXT: add a6, a6, a7
-; RV32I-SFB-NEXT: add a1, a6, a1
-; RV32I-SFB-NEXT: ret
-;
-; RV64I-SFB-LABEL: test_i64_1_volatile:
-; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: ld a3, 0(a3)
-; RV64I-SFB-NEXT: andi a1, a1, 1
-; RV64I-SFB-NEXT: bnez a1, .LBB10_2
-; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, a2
-; RV64I-SFB-NEXT: .LBB10_2: # %entry
-; RV64I-SFB-NEXT: add a0, a0, a3
-; RV64I-SFB-NEXT: ret
-;
-; RV32I-SFBILOAD-LABEL: test_i64_1_volatile:
-; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
-; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
-; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
-; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
-; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: lw a3, 36(a0)
-; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
-; RV32I-SFBILOAD-NEXT: add a0, a2, a4
-; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
-; RV32I-SFBILOAD-NEXT: add a3, a3, a5
-; RV32I-SFBILOAD-NEXT: add a1, a3, a1
-; RV32I-SFBILOAD-NEXT: ret
-;
-; RV64I-SFBILOAD-LABEL: test_i64_1_volatile:
-; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
-; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
-; RV64I-SFBILOAD-NEXT: beqz a1, .LBB10_2
-; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: ld a2, 32(a0)
-; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
-; RV64I-SFBILOAD-NEXT: add a0, a2, a3
-; RV64I-SFBILOAD-NEXT: ret
-entry:
- %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
- %val = load i64, ptr %addr ; load 64-bit value
- %val1 = load volatile i64, ptr %base1
- %res = select i1 %x, i64 %val, i64 %b
- %res1 = add i64 %res, %val1
- ret i64 %res1
-}
-
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
index 984e101e8a937..faf4dd0c57c7f 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
@@ -10,10 +10,9 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
-define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) nounwind {
+define i32 @test_i8_s(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_s:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB0_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lb a2, 4(a0)
@@ -23,7 +22,6 @@ define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-LABEL: test_i8_s:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB0_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lb a2, 4(a0)
@@ -34,7 +32,6 @@ define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-SFB-LABEL: test_i8_s:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB0_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -44,7 +41,6 @@ define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) nounwind {
; RV64I-SFB-LABEL: test_i8_s:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB0_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -53,7 +49,6 @@ define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i8_s:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB0_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lb a2, 4(a0)
@@ -63,7 +58,6 @@ define i32 @test_i8_s(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i8_s:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB0_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
@@ -78,10 +72,9 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) nounwind {
+define i32 @test_i8_z(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_z:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB1_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lbu a2, 4(a0)
@@ -91,7 +84,6 @@ define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-LABEL: test_i8_z:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB1_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lbu a2, 4(a0)
@@ -102,7 +94,6 @@ define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-SFB-LABEL: test_i8_z:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB1_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -112,7 +103,6 @@ define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) nounwind {
; RV64I-SFB-LABEL: test_i8_z:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB1_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -121,7 +111,6 @@ define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i8_z:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB1_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
@@ -131,7 +120,6 @@ define i32 @test_i8_z(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i8_z:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB1_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
@@ -146,10 +134,9 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) nounwind {
+define i32 @test_i16_s(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_s:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB2_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lh a2, 8(a0)
@@ -159,7 +146,6 @@ define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-LABEL: test_i16_s:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lh a2, 8(a0)
@@ -170,7 +156,6 @@ define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-SFB-LABEL: test_i16_s:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB2_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -180,7 +165,6 @@ define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) nounwind {
; RV64I-SFB-LABEL: test_i16_s:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB2_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -189,7 +173,6 @@ define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i16_s:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB2_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lh a2, 8(a0)
@@ -199,7 +182,6 @@ define i32 @test_i16_s(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i16_s:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB2_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
@@ -214,10 +196,9 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) nounwind {
+define i32 @test_i16_z(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_z:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lhu a2, 8(a0)
@@ -227,7 +208,6 @@ define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-LABEL: test_i16_z:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB3_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lhu a2, 8(a0)
@@ -238,7 +218,6 @@ define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-SFB-LABEL: test_i16_z:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB3_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -248,7 +227,6 @@ define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) nounwind {
; RV64I-SFB-LABEL: test_i16_z:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB3_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -257,7 +235,6 @@ define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i16_z:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB3_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
@@ -267,7 +244,6 @@ define i32 @test_i16_z(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i16_z:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB3_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
@@ -282,10 +258,9 @@ entry:
ret i32 %res
}
-define i32 @test_i32(ptr %base, i1 %x, i32 %b) nounwind {
+define i32 @test_i32(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i32:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB4_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lw a2, 16(a0)
@@ -295,7 +270,6 @@ define i32 @test_i32(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-LABEL: test_i32:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB4_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lw a2, 16(a0)
@@ -306,7 +280,6 @@ define i32 @test_i32(ptr %base, i1 %x, i32 %b) nounwind {
; RV32I-SFB-LABEL: test_i32:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB4_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -316,7 +289,6 @@ define i32 @test_i32(ptr %base, i1 %x, i32 %b) nounwind {
; RV64I-SFB-LABEL: test_i32:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB4_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -325,7 +297,6 @@ define i32 @test_i32(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i32:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB4_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
@@ -335,7 +306,6 @@ define i32 @test_i32(ptr %base, i1 %x, i32 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i32:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB4_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lw a2, 16(a0)
@@ -349,11 +319,10 @@ entry:
ret i32 %res
}
-define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
+define i32 @test_i8_s_store(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_s_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a4, 0(a3)
; RV32I-NEXT: bnez a1, .LBB5_2
; RV32I-NEXT: # %bb.1: # %entry
@@ -364,7 +333,6 @@ define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV64I-LABEL: test_i8_s_store:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sw a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB5_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -375,7 +343,6 @@ define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV32I-SFB-LABEL: test_i8_s_store:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB5_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -386,7 +353,6 @@ define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV64I-SFB-LABEL: test_i8_s_store:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB5_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -397,7 +363,6 @@ define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV32I-SFBILOAD-LABEL: test_i8_s_store:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
@@ -408,7 +373,6 @@ define i32 @test_i8_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV64I-SFBILOAD-LABEL: test_i8_s_store:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -424,11 +388,10 @@ entry:
ret i32 %res
}
-define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
+define i32 @test_i8_z_store(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_z_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a4, 0(a3)
; RV32I-NEXT: bnez a1, .LBB6_2
; RV32I-NEXT: # %bb.1: # %entry
@@ -439,7 +402,6 @@ define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV64I-LABEL: test_i8_z_store:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sw a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB6_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -450,7 +412,6 @@ define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV32I-SFB-LABEL: test_i8_z_store:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB6_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -461,7 +422,6 @@ define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV64I-SFB-LABEL: test_i8_z_store:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB6_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -472,7 +432,6 @@ define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV32I-SFBILOAD-LABEL: test_i8_z_store:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
@@ -483,7 +442,6 @@ define i32 @test_i8_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwi
; RV64I-SFBILOAD-LABEL: test_i8_z_store:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -499,11 +457,10 @@ entry:
ret i32 %res
}
-define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
+define i32 @test_i16_s_store(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_s_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a4, 0(a3)
; RV32I-NEXT: bnez a1, .LBB7_2
; RV32I-NEXT: # %bb.1: # %entry
@@ -514,7 +471,6 @@ define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV64I-LABEL: test_i16_s_store:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sw a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB7_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -525,7 +481,6 @@ define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV32I-SFB-LABEL: test_i16_s_store:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB7_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -536,7 +491,6 @@ define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV64I-SFB-LABEL: test_i16_s_store:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB7_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -547,7 +501,6 @@ define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV32I-SFBILOAD-LABEL: test_i16_s_store:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
@@ -558,7 +511,6 @@ define i32 @test_i16_s_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV64I-SFBILOAD-LABEL: test_i16_s_store:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -574,11 +526,10 @@ entry:
ret i32 %res
}
-define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
+define i32 @test_i16_z_store(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_z_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a4, 0(a3)
; RV32I-NEXT: bnez a1, .LBB8_2
; RV32I-NEXT: # %bb.1: # %entry
@@ -589,7 +540,6 @@ define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV64I-LABEL: test_i16_z_store:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sw a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB8_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -600,7 +550,6 @@ define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV32I-SFB-LABEL: test_i16_z_store:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB8_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -611,7 +560,6 @@ define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV64I-SFB-LABEL: test_i16_z_store:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB8_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -622,7 +570,6 @@ define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV32I-SFBILOAD-LABEL: test_i16_z_store:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
@@ -633,7 +580,6 @@ define i32 @test_i16_z_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounw
; RV64I-SFBILOAD-LABEL: test_i16_z_store:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -649,11 +595,10 @@ entry:
ret i32 %res
}
-define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwind {
+define i32 @test_i32_store(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i32_store:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a4, 0(a3)
; RV32I-NEXT: bnez a1, .LBB9_2
; RV32I-NEXT: # %bb.1: # %entry
@@ -664,7 +609,6 @@ define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwin
; RV64I-LABEL: test_i32_store:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lw a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sw a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB9_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -675,7 +619,6 @@ define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwin
; RV32I-SFB-LABEL: test_i32_store:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB9_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a0, a2
@@ -686,7 +629,6 @@ define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwin
; RV64I-SFB-LABEL: test_i32_store:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lw a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB9_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -697,7 +639,6 @@ define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwin
; RV32I-SFBILOAD-LABEL: test_i32_store:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
@@ -708,7 +649,6 @@ define i32 @test_i32_store(ptr %base, i1 %x, i32 %b, ptr %base1, i32 %c) nounwin
; RV64I-SFBILOAD-LABEL: test_i32_store:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -723,10 +663,9 @@ entry:
ret i32 %res
}
-define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) nounwind {
+define i64 @test_i8_s_1(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_s_1:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB10_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lb a2, 4(a0)
@@ -738,7 +677,6 @@ define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-LABEL: test_i8_s_1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB10_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lb a2, 4(a0)
@@ -749,7 +687,6 @@ define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFB-LABEL: test_i8_s_1:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: beqz a1, .LBB10_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
@@ -765,7 +702,6 @@ define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV64I-SFB-LABEL: test_i8_s_1:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB10_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -775,7 +711,6 @@ define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFBILOAD-LABEL: test_i8_s_1:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
@@ -790,7 +725,6 @@ define i64 @test_i8_s_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i8_s_1:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB10_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
@@ -805,10 +739,9 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) nounwind {
+define i64 @test_i8_z_1(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_z_1:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB11_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lbu a2, 4(a0)
@@ -820,7 +753,6 @@ define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-LABEL: test_i8_z_1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB11_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lbu a2, 4(a0)
@@ -831,14 +763,13 @@ define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFB-LABEL: test_i8_z_1:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB11_2
+; RV32I-SFB-NEXT: beqz a1, .LBB11_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB11_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB11_4
+; RV32I-SFB-NEXT: bnez a1, .LBB11_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB11_4: # %entry
; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
@@ -846,7 +777,6 @@ define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV64I-SFB-LABEL: test_i8_z_1:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB11_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -855,22 +785,20 @@ define i64 @test_i8_z_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i8_z_1:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB11_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB11_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_1:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB11_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
@@ -885,10 +813,9 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) nounwind {
+define i64 @test_i16_s_1(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_s_1:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB12_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lh a2, 8(a0)
@@ -900,7 +827,6 @@ define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-LABEL: test_i16_s_1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB12_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lh a2, 8(a0)
@@ -911,7 +837,6 @@ define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFB-LABEL: test_i16_s_1:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: beqz a1, .LBB12_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
@@ -927,7 +852,6 @@ define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV64I-SFB-LABEL: test_i16_s_1:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB12_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -937,7 +861,6 @@ define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFBILOAD-LABEL: test_i16_s_1:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
@@ -952,7 +875,6 @@ define i64 @test_i16_s_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i16_s_1:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB12_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
@@ -967,10 +889,9 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) nounwind {
+define i64 @test_i16_z_1(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_z_1:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB13_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lhu a2, 8(a0)
@@ -982,7 +903,6 @@ define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-LABEL: test_i16_z_1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB13_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lhu a2, 8(a0)
@@ -993,14 +913,13 @@ define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFB-LABEL: test_i16_z_1:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB13_2
+; RV32I-SFB-NEXT: beqz a1, .LBB13_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB13_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB13_4
+; RV32I-SFB-NEXT: bnez a1, .LBB13_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB13_4: # %entry
; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
@@ -1008,7 +927,6 @@ define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV64I-SFB-LABEL: test_i16_z_1:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB13_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1017,22 +935,20 @@ define i64 @test_i16_z_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i16_z_1:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB13_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB13_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_1:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB13_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
@@ -1047,10 +963,9 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) nounwind {
+define i64 @test_i32_z_1(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB14_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lw a2, 16(a0)
@@ -1062,7 +977,6 @@ define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-LABEL: test_i32_z_1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB14_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lwu a2, 16(a0)
@@ -1073,14 +987,13 @@ define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFB-LABEL: test_i32_z_1:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
-; RV32I-SFB-NEXT: bnez a1, .LBB14_2
+; RV32I-SFB-NEXT: beqz a1, .LBB14_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB14_2: # %entry
-; RV32I-SFB-NEXT: beqz a1, .LBB14_4
+; RV32I-SFB-NEXT: bnez a1, .LBB14_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB14_4: # %entry
; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
@@ -1088,7 +1001,6 @@ define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV64I-SFB-LABEL: test_i32_z_1:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lwu a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB14_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1097,22 +1009,20 @@ define i64 @test_i32_z_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i32_z_1:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a4, a1, 1
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB14_2
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB14_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a1, a3
-; RV32I-SFBILOAD-NEXT: beqz a4, .LBB14_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB14_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_1:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB14_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: lwu a2, 16(a0)
@@ -1127,10 +1037,9 @@ entry:
ret i64 %res
}
-define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) nounwind {
+define i64 @test_i64_1(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i64_1:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: beqz a1, .LBB15_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lw a2, 32(a0)
@@ -1142,7 +1051,6 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-LABEL: test_i64_1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: beqz a1, .LBB15_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: ld a2, 32(a0)
@@ -1154,7 +1062,6 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lw a4, 32(a0)
; RV32I-SFB-NEXT: lw a5, 36(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB15_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a4, a2
@@ -1170,7 +1077,6 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) nounwind {
; RV64I-SFB-LABEL: test_i64_1:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB15_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1179,7 +1085,6 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV32I-SFBILOAD-LABEL: test_i64_1:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB15_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
@@ -1194,7 +1099,6 @@ define i64 @test_i64_1(ptr %base, i1 %x, i64 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i64_1:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: beqz a1, .LBB15_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: ld a2, 32(a0)
@@ -1208,11 +1112,10 @@ entry:
ret i64 %res
}
-define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
+define i64 @test_i8_s_store_64(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lb a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
; RV32I-NEXT: bnez a1, .LBB16_2
@@ -1227,7 +1130,6 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV64I-LABEL: test_i8_s_store_64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lb a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB16_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -1238,7 +1140,7 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV32I-SFB-LABEL: test_i8_s_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lb a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: beqz a1, .LBB16_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
@@ -1247,7 +1149,6 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB16_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: mv a1, a3
@@ -1256,7 +1157,6 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV64I-SFB-LABEL: test_i8_s_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lb a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB16_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1267,7 +1167,7 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV32I-SFBILOAD-LABEL: test_i8_s_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB16_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
@@ -1276,7 +1176,6 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB16_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: mv a1, a3
@@ -1285,7 +1184,6 @@ define i64 @test_i8_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV64I-SFBILOAD-LABEL: test_i8_s_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB16_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -1301,11 +1199,10 @@ entry:
ret i64 %res
}
-define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
+define i64 @test_i8_z_store_64(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lbu a0, 4(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
; RV32I-NEXT: bnez a1, .LBB17_2
@@ -1319,7 +1216,6 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV64I-LABEL: test_i8_z_store_64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lbu a0, 4(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB17_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -1330,7 +1226,6 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV32I-SFB-LABEL: test_i8_z_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lbu a0, 4(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: beqz a1, .LBB17_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: li a3, 0
@@ -1347,7 +1242,6 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV64I-SFB-LABEL: test_i8_z_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lbu a0, 4(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB17_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1358,7 +1252,6 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV32I-SFBILOAD-LABEL: test_i8_z_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB17_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: li a3, 0
@@ -1375,7 +1268,6 @@ define i64 @test_i8_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nou
; RV64I-SFBILOAD-LABEL: test_i8_z_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB17_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -1391,11 +1283,10 @@ entry:
ret i64 %res
}
-define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
+define i64 @test_i16_s_store_64(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lh a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
; RV32I-NEXT: bnez a1, .LBB18_2
@@ -1410,7 +1301,6 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-LABEL: test_i16_s_store_64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lh a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB18_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -1421,7 +1311,7 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFB-LABEL: test_i16_s_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lh a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: beqz a1, .LBB18_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a2, a0
@@ -1430,7 +1320,6 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFB-NEXT: # %bb.3: # %entry
; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB18_4: # %entry
-; RV32I-SFB-NEXT: sw a5, 0(a4)
; RV32I-SFB-NEXT: sw a6, 4(a4)
; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: mv a1, a3
@@ -1439,7 +1328,6 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-SFB-LABEL: test_i16_s_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lh a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB18_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1450,7 +1338,7 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFBILOAD-LABEL: test_i16_s_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB18_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a2, a0
@@ -1459,7 +1347,6 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB18_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: mv a1, a3
@@ -1468,7 +1355,6 @@ define i64 @test_i16_s_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-SFBILOAD-LABEL: test_i16_s_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB18_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -1484,11 +1370,10 @@ entry:
ret i64 %res
}
-define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
+define i64 @test_i16_z_store_64(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lhu a0, 8(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
; RV32I-NEXT: bnez a1, .LBB19_2
@@ -1502,7 +1387,6 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-LABEL: test_i16_z_store_64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lhu a0, 8(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB19_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -1513,7 +1397,6 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFB-LABEL: test_i16_z_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lhu a0, 8(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: beqz a1, .LBB19_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: li a3, 0
@@ -1530,7 +1413,6 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-SFB-LABEL: test_i16_z_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lhu a0, 8(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB19_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1541,7 +1423,6 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFBILOAD-LABEL: test_i16_z_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB19_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: li a3, 0
@@ -1558,7 +1439,6 @@ define i64 @test_i16_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-SFBILOAD-LABEL: test_i16_z_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB19_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -1574,11 +1454,10 @@ entry:
ret i64 %res
}
-define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
+define i64 @test_i32_z_store_64(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lw a0, 16(a0)
-; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
; RV32I-NEXT: bnez a1, .LBB20_2
@@ -1592,7 +1471,6 @@ define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-LABEL: test_i32_z_store_64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lwu a0, 16(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB20_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -1603,7 +1481,6 @@ define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFB-LABEL: test_i32_z_store_64:
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lw a0, 16(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: beqz a1, .LBB20_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: li a3, 0
@@ -1620,7 +1497,6 @@ define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-SFB-LABEL: test_i32_z_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: lwu a0, 16(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB20_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1631,7 +1507,6 @@ define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV32I-SFBILOAD-LABEL: test_i32_z_store_64:
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: beqz a1, .LBB20_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: li a3, 0
@@ -1648,7 +1523,6 @@ define i64 @test_i32_z_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) no
; RV64I-SFBILOAD-LABEL: test_i32_z_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB20_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -1664,14 +1538,13 @@ entry:
ret i64 %res
}
-define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) nounwind {
+define i64 @test_i64_store_64(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i64_store_64:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: mv a7, a1
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: lw a0, 32(a0)
; RV32I-NEXT: lw a1, 36(a1)
-; RV32I-NEXT: andi a7, a7, 1
; RV32I-NEXT: sw a5, 0(a4)
; RV32I-NEXT: sw a6, 4(a4)
; RV32I-NEXT: bnez a7, .LBB21_2
@@ -1684,7 +1557,6 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) noun
; RV64I-LABEL: test_i64_store_64:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: ld a0, 32(a0)
-; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: sd a4, 0(a3)
; RV64I-NEXT: bnez a1, .LBB21_2
; RV64I-NEXT: # %bb.1: # %entry
@@ -1696,7 +1568,6 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) noun
; RV32I-SFB: # %bb.0: # %entry
; RV32I-SFB-NEXT: lw a7, 32(a0)
; RV32I-SFB-NEXT: lw t0, 36(a0)
-; RV32I-SFB-NEXT: andi a1, a1, 1
; RV32I-SFB-NEXT: bnez a1, .LBB21_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
; RV32I-SFB-NEXT: mv a7, a2
@@ -1714,7 +1585,6 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) noun
; RV64I-SFB-LABEL: test_i64_store_64:
; RV64I-SFB: # %bb.0: # %entry
; RV64I-SFB-NEXT: ld a0, 32(a0)
-; RV64I-SFB-NEXT: andi a1, a1, 1
; RV64I-SFB-NEXT: bnez a1, .LBB21_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
; RV64I-SFB-NEXT: mv a0, a2
@@ -1726,7 +1596,6 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) noun
; RV32I-SFBILOAD: # %bb.0: # %entry
; RV32I-SFBILOAD-NEXT: lw a7, 32(a0)
; RV32I-SFBILOAD-NEXT: lw t0, 36(a0)
-; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
; RV32I-SFBILOAD-NEXT: bnez a1, .LBB21_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV32I-SFBILOAD-NEXT: mv a7, a2
@@ -1744,7 +1613,6 @@ define i64 @test_i64_store_64(ptr %base, i1 %x, i64 %b, ptr %base1, i64 %c) noun
; RV64I-SFBILOAD-LABEL: test_i64_store_64:
; RV64I-SFBILOAD: # %bb.0: # %entry
; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
-; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
; RV64I-SFBILOAD-NEXT: bnez a1, .LBB21_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
; RV64I-SFBILOAD-NEXT: mv a0, a2
@@ -1759,3 +1627,4886 @@ entry:
ret i64 %res
}
+define i32 @test_i8_s_volatile(ptr %base, i1 zeroext %x, i32 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i8_s_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a4, 4(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB22_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB22_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a4, 4(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB22_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB22_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: bnez a1, .LBB22_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB22_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB22_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB22_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB22_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB22_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB22_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB22_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i8_z_volatile(ptr %base, i1 zeroext %x, i32 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i8_z_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a4, 4(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB23_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB23_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a4, 4(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB23_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB23_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: bnez a1, .LBB23_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB23_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB23_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB23_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB23_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB23_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB23_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB23_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i16_s_volatile(ptr %base, i1 zeroext %x, i32 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i16_s_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a4, 8(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB24_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB24_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a4, 8(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB24_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB24_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: bnez a1, .LBB24_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB24_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB24_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB24_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB24_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB24_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB24_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB24_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i16_z_volatile(ptr %base, i1 zeroext %x, i32 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i16_z_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a4, 8(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB25_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB25_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a4, 8(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB25_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB25_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: bnez a1, .LBB25_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB25_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB25_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB25_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB25_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB25_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB25_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB25_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+define i32 @test_i32_volatile(ptr %base, i1 zeroext %x, i32 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i32_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a4, 16(a0)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB26_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a4, a2
+; RV32I-NEXT: .LBB26_2: # %entry
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a4, 16(a0)
+; RV64I-NEXT: lw a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB26_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB26_2: # %entry
+; RV64I-NEXT: addw a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: lw a3, 0(a3)
+; RV32I-SFB-NEXT: bnez a1, .LBB26_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB26_2: # %entry
+; RV32I-SFB-NEXT: add a0, a0, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: lw a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB26_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB26_2: # %entry
+; RV64I-SFB-NEXT: addw a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB26_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV32I-SFBILOAD-NEXT: .LBB26_2: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB26_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV64I-SFBILOAD-NEXT: .LBB26_2: # %entry
+; RV64I-SFBILOAD-NEXT: addw a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ %val1 = load volatile i32, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ %res1 = add i32 %res, %val1
+ ret i32 %res1
+}
+
+
+define i64 @test_i8_s_1_volatile(ptr %base, i1 zeroext %x, i64 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i8_s_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a6, 4(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: bnez a1, .LBB27_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: j .LBB27_3
+; RV32I-NEXT: .LBB27_2:
+; RV32I-NEXT: srai a3, a6, 31
+; RV32I-NEXT: .LBB27_3: # %entry
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: sltu a1, a0, a6
+; RV32I-NEXT: add a3, a3, a5
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a4, 4(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB27_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB27_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: lw a5, 4(a4)
+; RV32I-SFB-NEXT: lw a4, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB27_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB27_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB27_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB27_4: # %entry
+; RV32I-SFB-NEXT: add a0, a2, a4
+; RV32I-SFB-NEXT: sltu a1, a0, a2
+; RV32I-SFB-NEXT: add a3, a3, a5
+; RV32I-SFB-NEXT: add a1, a3, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB27_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB27_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB27_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB27_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB27_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB27_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a3, a3, a5
+; RV32I-SFBILOAD-NEXT: add a1, a3, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB27_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lb a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB27_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i8_z_1_volatile(ptr %base, i1 zeroext %x, i64 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i8_z_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a6, 4(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: bnez a1, .LBB28_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: .LBB28_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sltu a2, a0, a6
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a4, 4(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB28_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB28_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a5, 4(a0)
+; RV32I-SFB-NEXT: lw a6, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: bnez a1, .LBB28_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB28_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB28_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: add a6, a6, a3
+; RV32I-SFB-NEXT: .LBB28_4: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a1, a0, a5
+; RV32I-SFB-NEXT: add a1, a6, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB28_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB28_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB28_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV32I-SFBILOAD-NEXT: .LBB28_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB28_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: add a5, a5, a3
+; RV32I-SFBILOAD-NEXT: .LBB28_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a1, a5, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB28_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a2, 4(a0)
+; RV64I-SFBILOAD-NEXT: .LBB28_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load i8, ptr %addr ; load 8-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i16_s_1_volatile(ptr %base, i1 zeroext %x, i64 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i16_s_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a6, 8(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: bnez a1, .LBB29_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: j .LBB29_3
+; RV32I-NEXT: .LBB29_2:
+; RV32I-NEXT: srai a3, a6, 31
+; RV32I-NEXT: .LBB29_3: # %entry
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: sltu a1, a0, a6
+; RV32I-NEXT: add a3, a3, a5
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a4, 8(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB29_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB29_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: lw a5, 4(a4)
+; RV32I-SFB-NEXT: lw a4, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB29_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB29_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB29_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB29_4: # %entry
+; RV32I-SFB-NEXT: add a0, a2, a4
+; RV32I-SFB-NEXT: sltu a1, a0, a2
+; RV32I-SFB-NEXT: add a3, a3, a5
+; RV32I-SFB-NEXT: add a1, a3, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB29_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB29_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB29_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB29_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB29_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB29_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a3, a3, a5
+; RV32I-SFBILOAD-NEXT: add a1, a3, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB29_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lh a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB29_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i16_z_1_volatile(ptr %base, i1 zeroext %x, i64 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i16_z_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a6, 8(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: bnez a1, .LBB30_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: .LBB30_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sltu a2, a0, a6
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a4, 8(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB30_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB30_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a5, 8(a0)
+; RV32I-SFB-NEXT: lw a6, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: bnez a1, .LBB30_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB30_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB30_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: add a6, a6, a3
+; RV32I-SFB-NEXT: .LBB30_4: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a1, a0, a5
+; RV32I-SFB-NEXT: add a1, a6, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB30_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB30_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB30_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV32I-SFBILOAD-NEXT: .LBB30_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB30_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: add a5, a5, a3
+; RV32I-SFBILOAD-NEXT: .LBB30_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a1, a5, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB30_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a2, 8(a0)
+; RV64I-SFBILOAD-NEXT: .LBB30_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load i16, ptr %addr ; load 16-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i32_z_1_volatile(ptr %base, i1 zeroext %x, i64 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i32_z_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a6, 16(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: bnez a1, .LBB31_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: .LBB31_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: add a0, a6, a0
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: sltu a2, a0, a6
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lwu a4, 16(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB31_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB31_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a5, 16(a0)
+; RV32I-SFB-NEXT: lw a6, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: bnez a1, .LBB31_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB31_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB31_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: add a6, a6, a3
+; RV32I-SFB-NEXT: .LBB31_4: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a1, a0, a5
+; RV32I-SFB-NEXT: add a1, a6, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB31_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB31_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB31_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 16(a0)
+; RV32I-SFBILOAD-NEXT: .LBB31_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB31_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: add a5, a5, a3
+; RV32I-SFBILOAD-NEXT: .LBB31_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a1, a5, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB31_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu a2, 16(a0)
+; RV64I-SFBILOAD-NEXT: .LBB31_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load i32, ptr %addr ; load 32-bit value
+ %val1 = load volatile i64, ptr %base1
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i64 @test_i64_1_volatile(ptr %base, i1 zeroext %x, i64 %b, ptr %base1) nounwind {
+; RV32I-LABEL: test_i64_1_volatile:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a7, 32(a0)
+; RV32I-NEXT: lw a6, 36(a0)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a0, 0(a4)
+; RV32I-NEXT: bnez a1, .LBB32_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: mv a7, a2
+; RV32I-NEXT: .LBB32_2: # %entry
+; RV32I-NEXT: add a0, a7, a0
+; RV32I-NEXT: sltu a1, a0, a7
+; RV32I-NEXT: add a5, a6, a5
+; RV32I-NEXT: add a1, a5, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_volatile:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: ld a4, 32(a0)
+; RV64I-NEXT: ld a0, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB32_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a4, a2
+; RV64I-NEXT: .LBB32_2: # %entry
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_volatile:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a5, 32(a0)
+; RV32I-SFB-NEXT: lw a6, 36(a0)
+; RV32I-SFB-NEXT: lw a7, 4(a4)
+; RV32I-SFB-NEXT: lw a0, 0(a4)
+; RV32I-SFB-NEXT: bnez a1, .LBB32_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a6, a3
+; RV32I-SFB-NEXT: .LBB32_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB32_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a5, a2
+; RV32I-SFB-NEXT: .LBB32_4: # %entry
+; RV32I-SFB-NEXT: add a0, a5, a0
+; RV32I-SFB-NEXT: sltu a1, a0, a5
+; RV32I-SFB-NEXT: add a6, a6, a7
+; RV32I-SFB-NEXT: add a1, a6, a1
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_volatile:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: ld a3, 0(a3)
+; RV64I-SFB-NEXT: bnez a1, .LBB32_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB32_2: # %entry
+; RV64I-SFB-NEXT: add a0, a0, a3
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_volatile:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a5, 4(a4)
+; RV32I-SFBILOAD-NEXT: lw a4, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB32_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw a2, 32(a0)
+; RV32I-SFBILOAD-NEXT: .LBB32_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB32_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: lw a3, 36(a0)
+; RV32I-SFBILOAD-NEXT: .LBB32_4: # %entry
+; RV32I-SFBILOAD-NEXT: add a0, a2, a4
+; RV32I-SFBILOAD-NEXT: sltu a1, a0, a2
+; RV32I-SFBILOAD-NEXT: add a3, a3, a5
+; RV32I-SFBILOAD-NEXT: add a1, a3, a1
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_volatile:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: ld a3, 0(a3)
+; RV64I-SFBILOAD-NEXT: beqz a1, .LBB32_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: ld a2, 32(a0)
+; RV64I-SFBILOAD-NEXT: .LBB32_2: # %entry
+; RV64I-SFBILOAD-NEXT: add a0, a2, a3
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load i64, ptr %addr ; load 64-bit value
+ %val1 = load volatile i64, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ %res1 = add i64 %res, %val1
+ ret i64 %res1
+}
+
+define i32 @test_i8_s_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_s_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB33_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB33_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB33_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB33_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: mv s1, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s1, .LBB33_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: .LBB33_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB33_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB33_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: mv s1, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB33_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB33_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB33_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB33_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_z_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s1, .LBB34_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB34_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB34_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB34_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: mv s1, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s1, .LBB34_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: .LBB34_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB34_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB34_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: mv s1, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB34_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: .LBB34_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB34_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB34_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_s_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB35_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB35_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB35_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB35_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: mv s1, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB35_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: .LBB35_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB35_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB35_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: mv s1, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB35_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB35_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB35_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB35_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_z_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s1, .LBB36_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB36_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB36_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB36_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: mv s1, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s1, .LBB36_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: .LBB36_2: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB36_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB36_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: mv s1, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s1, .LBB36_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB36_2: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB36_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB36_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i32_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: bnez s1, .LBB37_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB37_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: bnez s1, .LBB37_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB37_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a2
+; RV32I-SFB-NEXT: mv s1, a1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s1, .LBB37_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: .LBB37_2: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s1, .LBB37_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB37_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a2
+; RV32I-SFBILOAD-NEXT: mv s1, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s1, .LBB37_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: .LBB37_2: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB37_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB37_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_s_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_s_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB38_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: .LBB38_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB38_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB38_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: mv s3, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s3, .LBB38_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: .LBB38_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB38_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB38_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: mv s3, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB38_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB38_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB38_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB38_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_z_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB39_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: .LBB39_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB39_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB39_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: mv s3, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s3, .LBB39_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: .LBB39_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB39_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB39_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: mv s3, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB39_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: .LBB39_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB39_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB39_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_s_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB40_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: .LBB40_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB40_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB40_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: mv s3, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB40_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: .LBB40_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB40_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB40_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: mv s3, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB40_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB40_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB40_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB40_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_z_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: beqz s3, .LBB41_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: .LBB41_2: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB41_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB41_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: mv s3, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s3, .LBB41_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: .LBB41_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB41_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB41_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: mv s3, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s3, .LBB41_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB41_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB41_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB41_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i32_store_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s1, a4
+; RV32I-NEXT: mv s2, a3
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: sw s1, 0(s2)
+; RV32I-NEXT: bnez s3, .LBB42_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: .LBB42_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: sw s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB42_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB42_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a4
+; RV32I-SFB-NEXT: mv s1, a3
+; RV32I-SFB-NEXT: mv s2, a2
+; RV32I-SFB-NEXT: mv s3, a1
+; RV32I-SFB-NEXT: addi a0, a0, 16
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_4
+; RV32I-SFB-NEXT: bnez s3, .LBB42_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: .LBB42_2: # %entry
+; RV32I-SFB-NEXT: sw s0, 0(s1)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 16
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_4
+; RV64I-SFB-NEXT: bnez s3, .LBB42_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB42_2: # %entry
+; RV64I-SFB-NEXT: sw s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a4
+; RV32I-SFBILOAD-NEXT: mv s1, a3
+; RV32I-SFBILOAD-NEXT: mv s2, a2
+; RV32I-SFBILOAD-NEXT: mv s3, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_4
+; RV32I-SFBILOAD-NEXT: bnez s3, .LBB42_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: .LBB42_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_4
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB42_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB42_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_s_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB43_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB43_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB43_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB43_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s2, .LBB43_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: .LBB43_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB43_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB43_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s1, .LBB43_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: .LBB43_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB43_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB43_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB43_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB43_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB43_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB43_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_z_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: beqz s2, .LBB44_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB44_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: beqz s1, .LBB44_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB44_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s2, .LBB44_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB44_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB44_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: .LBB44_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s1, .LBB44_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: .LBB44_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB44_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB44_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB44_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: .LBB44_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB44_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: .LBB44_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_s_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB45_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB45_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB45_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB45_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB45_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: .LBB45_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB45_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: .LBB45_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB45_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: .LBB45_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB45_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB45_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB45_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB45_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB45_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB45_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_z_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: beqz s2, .LBB46_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB46_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: beqz s1, .LBB46_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB46_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s2, .LBB46_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB46_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB46_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: .LBB46_4: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s1, .LBB46_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: .LBB46_2: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB46_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB46_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB46_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB46_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB46_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB46_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i32_z_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: beqz s2, .LBB47_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: .LBB47_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: beqz s1, .LBB47_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: .LBB47_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: addi a2, sp, 12
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 12(sp)
+; RV32I-SFB-NEXT: bnez s2, .LBB47_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB47_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB47_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB47_4: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 0
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s1, .LBB47_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB47_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB47_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: .LBB47_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB47_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB47_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB47_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: .LBB47_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i64_1_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: bnez s2, .LBB48_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB48_2: # %entry
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_1_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: bnez s1, .LBB48_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB48_2: # %entry
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_1_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -16
+; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: bnez s2, .LBB48_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB48_2: # %entry
+; RV32I-SFB-NEXT: bnez s2, .LBB48_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: .LBB48_4: # %entry
+; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_1_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s1, .LBB48_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB48_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_1_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
+; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB48_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: .LBB48_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s2, .LBB48_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: .LBB48_4: # %entry
+; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_1_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s1, .LBB48_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: .LBB48_2: # %entry
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr monotonic, align 8 ; load 64-bit value
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_s_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i8_s_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB49_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 24
+; RV32I-NEXT: .LBB49_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB49_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: .LBB49_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: slli a0, a0, 24
+; RV32I-SFB-NEXT: beqz s5, .LBB49_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: .LBB49_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB49_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB49_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: slli a0, a0, 56
+; RV64I-SFB-NEXT: beqz s3, .LBB49_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: .LBB49_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB49_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: .LBB49_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB49_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB49_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB49_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: .LBB49_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i8_z_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s3, a6
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: mv s5, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 4
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: sw s4, 0(s5)
+; RV32I-NEXT: sw s3, 4(s5)
+; RV32I-NEXT: beqz s2, .LBB50_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: .LBB50_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 4
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB50_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: .LBB50_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a0, a0, 4
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_1
+; RV32I-SFB-NEXT: beqz s5, .LBB50_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB50_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB50_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: .LBB50_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 4
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_1
+; RV64I-SFB-NEXT: beqz s3, .LBB50_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: .LBB50_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_1
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB50_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB50_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB50_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: .LBB50_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_1
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB50_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: .LBB50_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr monotonic, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i16_s_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: beqz s5, .LBB51_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: srai s1, a0, 16
+; RV32I-NEXT: .LBB51_2: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB51_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: .LBB51_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB51_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: srai s4, a0, 16
+; RV32I-SFB-NEXT: .LBB51_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB51_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: .LBB51_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB51_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: .LBB51_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB51_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB51_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB51_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB51_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB51_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB51_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i16_z_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s3, a6
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: mv s5, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a0, a0, 8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: sw s4, 0(s5)
+; RV32I-NEXT: sw s3, 4(s5)
+; RV32I-NEXT: beqz s2, .LBB52_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: .LBB52_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 8
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: beqz s3, .LBB52_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: .LBB52_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a0, a0, 8
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_2
+; RV32I-SFB-NEXT: slli a0, a0, 16
+; RV32I-SFB-NEXT: beqz s5, .LBB52_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB52_2: # %entry
+; RV32I-SFB-NEXT: beqz s5, .LBB52_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: .LBB52_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 8
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_2
+; RV64I-SFB-NEXT: slli a0, a0, 48
+; RV64I-SFB-NEXT: beqz s3, .LBB52_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: .LBB52_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_2
+; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB52_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB52_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB52_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: .LBB52_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_2
+; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
+; RV64I-SFBILOAD-NEXT: beqz s3, .LBB52_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: .LBB52_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr monotonic, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i32_z_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s3, a6
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: mv s5, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: mv a2, sp
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: sw s4, 0(s5)
+; RV32I-NEXT: sw s3, 4(s5)
+; RV32I-NEXT: bnez s1, .LBB53_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: .LBB53_2: # %entry
+; RV32I-NEXT: addi a1, s1, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: lwu a0, 4(sp)
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB53_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB53_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: mv a2, sp
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 0(sp)
+; RV32I-SFB-NEXT: beqz s5, .LBB53_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: .LBB53_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB53_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB53_4: # %entry
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 0
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s3, .LBB53_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB53_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: mv a2, sp
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
+; RV32I-SFBILOAD-NEXT: beqz s5, .LBB53_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: .LBB53_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB53_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB53_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB53_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB53_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
+ %ext = zext i32 %val to i64 ; zero-extend to 64 bits
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i64_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
+; RV32I-LABEL: test_i64_store_64_2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s2, a6
+; RV32I-NEXT: mv s3, a5
+; RV32I-NEXT: mv s4, a4
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw s3, 0(s4)
+; RV32I-NEXT: sw s2, 4(s4)
+; RV32I-NEXT: bnez s5, .LBB54_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: .LBB54_2: # %entry
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i64_store_64_2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s1, a4
+; RV64I-NEXT: mv s2, a3
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s3, a1
+; RV64I-NEXT: addi a0, a0, 32
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: sd s1, 0(s2)
+; RV64I-NEXT: bnez s3, .LBB54_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: .LBB54_2: # %entry
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i64_store_64_2:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a6
+; RV32I-SFB-NEXT: mv s1, a5
+; RV32I-SFB-NEXT: mv s2, a4
+; RV32I-SFB-NEXT: mv s3, a3
+; RV32I-SFB-NEXT: mv s4, a2
+; RV32I-SFB-NEXT: mv s5, a1
+; RV32I-SFB-NEXT: addi a0, a0, 32
+; RV32I-SFB-NEXT: li a1, 0
+; RV32I-SFB-NEXT: call __atomic_load_8
+; RV32I-SFB-NEXT: sw s1, 0(s2)
+; RV32I-SFB-NEXT: bnez s5, .LBB54_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: .LBB54_2: # %entry
+; RV32I-SFB-NEXT: bnez s5, .LBB54_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a1, s3
+; RV32I-SFB-NEXT: .LBB54_4: # %entry
+; RV32I-SFB-NEXT: sw s0, 4(s2)
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i64_store_64_2:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -48
+; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a4
+; RV64I-SFB-NEXT: mv s1, a3
+; RV64I-SFB-NEXT: mv s2, a2
+; RV64I-SFB-NEXT: mv s3, a1
+; RV64I-SFB-NEXT: addi a0, a0, 32
+; RV64I-SFB-NEXT: li a1, 0
+; RV64I-SFB-NEXT: call __atomic_load_8
+; RV64I-SFB-NEXT: bnez s3, .LBB54_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: .LBB54_2: # %entry
+; RV64I-SFB-NEXT: sd s0, 0(s1)
+; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i64_store_64_2:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a6
+; RV32I-SFBILOAD-NEXT: mv s1, a5
+; RV32I-SFBILOAD-NEXT: mv s2, a4
+; RV32I-SFBILOAD-NEXT: mv s3, a3
+; RV32I-SFBILOAD-NEXT: mv s4, a2
+; RV32I-SFBILOAD-NEXT: mv s5, a1
+; RV32I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV32I-SFBILOAD-NEXT: li a1, 0
+; RV32I-SFBILOAD-NEXT: call __atomic_load_8
+; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB54_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: .LBB54_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez s5, .LBB54_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a1, s3
+; RV32I-SFBILOAD-NEXT: .LBB54_4: # %entry
+; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i64_store_64_2:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
+; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a4
+; RV64I-SFBILOAD-NEXT: mv s1, a3
+; RV64I-SFBILOAD-NEXT: mv s2, a2
+; RV64I-SFBILOAD-NEXT: mv s3, a1
+; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
+; RV64I-SFBILOAD-NEXT: li a1, 0
+; RV64I-SFBILOAD-NEXT: call __atomic_load_8
+; RV64I-SFBILOAD-NEXT: bnez s3, .LBB54_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: .LBB54_2: # %entry
+; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
+; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i64, ptr %addr monotonic, align 8 ; load 64-bit value
+ store i64 %c, ptr %base1
+ %res = select i1 %x, i64 %val, i64 %b
+ ret i64 %res
+}
+
>From 52e47be331dd7b0dfb506a81098f4ad665cc02e4 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Mon, 8 Dec 2025 11:25:29 +0530
Subject: [PATCH 09/11] fixup! Address comments
Change-Id: I19e55ada194ba2616c77eb82174ca059cbff29d2
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 11688476a2554..27b9f45877c40 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -926,24 +926,19 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
if (MI.getOpcode() != RISCV::PseudoCCMOVGPR)
return nullptr;
- if (!STI.hasShortForwardBranchILoad() ||
- (LoadMI.getOpcode() != RISCV::LB && LoadMI.getOpcode() != RISCV::LBU &&
- LoadMI.getOpcode() != RISCV::LH && LoadMI.getOpcode() != RISCV::LHU &&
- LoadMI.getOpcode() != RISCV::LW && LoadMI.getOpcode() != RISCV::LWU &&
- LoadMI.getOpcode() != RISCV::LD))
+ unsigned PredOpc = getLoadPredicatedOpcode(LoadMI.getOpcode());
+
+ if (!STI.hasShortForwardBranchILoad() || !PredOpc)
return nullptr;
MachineRegisterInfo &MRI = MF.getRegInfo();
bool Invert = MRI.getVRegDef(MI.getOperand(4).getReg()) == &LoadMI;
- MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
+ const MachineOperand &FalseReg = MI.getOperand(Invert ? 5 : 4);
Register DestReg = MI.getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
return nullptr;
- unsigned PredOpc = getLoadPredicatedOpcode(LoadMI.getOpcode());
- assert(PredOpc != 0 && "Unexpected opcode!");
-
// Create a new predicated version of DefMI.
MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), InsertPt,
MI.getDebugLoc(), get(PredOpc), DestReg);
>From 65403e2d08bc9decc3b13ed1220c3352163f8ddd Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Mon, 8 Dec 2025 12:55:56 +0530
Subject: [PATCH 10/11] fixup! Address comments
Change-Id: I3098b45d7ca8b2cb164b56ecd92a46aca3dccf74
---
llvm/lib/Target/RISCV/RISCVFeatures.td | 2 -
...-branch-opt-load-atomic-acquire-seq_cst.ll | 782 +---
.../RISCV/short-forward-branch-opt-load.ll | 3405 +++--------------
3 files changed, 760 insertions(+), 3429 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index dd7f36136ea4e..8425a9a231e97 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1928,8 +1928,6 @@ def TuneShortForwardBranchIMul
[TuneShortForwardBranchIALU]>;
def HasShortForwardBranchIMul : Predicate<"Subtarget->hasShortForwardBranchIMul()">;
-
-
def TuneShortForwardBranchILoad
: SubtargetFeature<"short-forward-branch-iload", "HasShortForwardBranchILoad",
"true", "Enable short forward branch optimization for load instructions",
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll
index d4e418ebb8fd3..d8217fa397a3c 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire-seq_cst.ll
@@ -1052,171 +1052,78 @@ entry:
define i64 @test_i32_z_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: addi a2, sp, 12
-; RV32I-NEXT: li a3, 2
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: beqz s2, .LBB14_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB14_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB14_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_1_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 2
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: beqz s1, .LBB14_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: lwu a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB14_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB14_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_1_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: mv s2, a1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: addi a2, sp, 12
-; RV32I-SFB-NEXT: li a3, 2
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 12(sp)
-; RV32I-SFB-NEXT: bnez s2, .LBB14_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB14_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB14_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB14_4
+; RV32I-SFB-NEXT: bnez a1, .LBB14_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB14_4: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_1_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 2
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s1, .LBB14_2
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB14_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB14_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_1_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: mv s2, a1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
-; RV32I-SFBILOAD-NEXT: li a3, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB14_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB14_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_1_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB14_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
%ext = zext i32 %val to i64 ; zero-extend to 64 bits
%res = select i1 %x, i64 %ext, i64 %b
ret i64 %res
@@ -1709,225 +1616,87 @@ entry:
define i64 @test_i32_z_store_64_3(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64_3:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s3, a6
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s2, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: mv a2, sp
-; RV32I-NEXT: li a3, 2
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: lw a0, 0(sp)
-; RV32I-NEXT: sw s4, 0(s5)
-; RV32I-NEXT: sw s3, 4(s5)
-; RV32I-NEXT: bnez s1, .LBB20_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB20_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB20_2: # %entry
-; RV32I-NEXT: addi a1, s1, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_store_64_3:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 2
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: lwu a0, 4(sp)
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB20_2
+; RV64I-NEXT: lwu a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB20_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB20_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_store_64_3:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: mv s5, a1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: mv a2, sp
-; RV32I-SFB-NEXT: li a3, 2
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 0(sp)
-; RV32I-SFB-NEXT: beqz s5, .LBB20_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB20_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB20_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB20_4
+; RV32I-SFB-NEXT: bnez a1, .LBB20_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB20_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_store_64_3:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 2
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s3, .LBB20_2
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB20_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB20_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_3:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: mv s5, a1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: mv a2, sp
-; RV32I-SFBILOAD-NEXT: li a3, 2
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB20_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB20_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB20_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB20_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB20_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_3:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 2
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB20_2
+; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB20_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB20_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
%ext = zext i32 %val to i64 ; zero-extend to 64 bits
store i64 %c, ptr %base1
%res = select i1 %x, i64 %ext, i64 %b
@@ -3215,171 +2984,84 @@ entry:
define i64 @test_i32_z_1_4(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: addi a2, sp, 12
-; RV32I-NEXT: li a3, 5
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: beqz s2, .LBB36_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB36_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB36_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_1_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 5
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: beqz s1, .LBB36_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lwu a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB36_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB36_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_1_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: mv s2, a1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: addi a2, sp, 12
-; RV32I-SFB-NEXT: li a3, 5
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 12(sp)
-; RV32I-SFB-NEXT: bnez s2, .LBB36_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB36_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB36_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB36_4
+; RV32I-SFB-NEXT: beqz a1, .LBB36_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB36_4: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_1_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 5
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s1, .LBB36_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB36_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB36_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: fence r, rw
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_1_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: mv s2, a1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
-; RV32I-SFBILOAD-NEXT: li a3, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB36_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB36_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB36_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB36_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB36_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB36_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_1_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB36_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB36_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB36_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: fence r, rw
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
+ %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
%ext = zext i32 %val to i64 ; zero-extend to 64 bits
%res = select i1 %x, i64 %ext, i64 %b
ret i64 %res
@@ -3899,225 +3581,93 @@ entry:
define i64 @test_i32_z_store_64_4(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64_4:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s3, a6
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s2, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: mv a2, sp
-; RV32I-NEXT: li a3, 5
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: lw a0, 0(sp)
-; RV32I-NEXT: sw s4, 0(s5)
-; RV32I-NEXT: sw s3, 4(s5)
-; RV32I-NEXT: bnez s1, .LBB42_2
+; RV32I-NEXT: fence rw, rw
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB42_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB42_2: # %entry
-; RV32I-NEXT: addi a1, s1, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_store_64_4:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 5
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: lwu a0, 4(sp)
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB42_2
+; RV64I-NEXT: fence rw, rw
+; RV64I-NEXT: lwu a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB42_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB42_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_store_64_4:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: mv s5, a1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: mv a2, sp
-; RV32I-SFB-NEXT: li a3, 5
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 0(sp)
-; RV32I-SFB-NEXT: beqz s5, .LBB42_2
+; RV32I-SFB-NEXT: fence rw, rw
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: beqz a1, .LBB42_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB42_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB42_4
+; RV32I-SFB-NEXT: bnez a1, .LBB42_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB42_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_store_64_4:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 5
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s3, .LBB42_2
+; RV64I-SFB-NEXT: fence rw, rw
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB42_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB42_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_4:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: mv s5, a1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: mv a2, sp
-; RV32I-SFBILOAD-NEXT: li a3, 5
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB42_2
+; RV32I-SFBILOAD-NEXT: fence rw, rw
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB42_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB42_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB42_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB42_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB42_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_4:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 5
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB42_2
+; RV64I-SFBILOAD-NEXT: fence rw, rw
+; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB42_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB42_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr seq_cst, align 2 ; load 32-bit value
+ %val = load atomic i32, ptr %addr seq_cst, align 4 ; load 32-bit value
%ext = zext i32 %val to i64 ; zero-extend to 64 bits
store i64 %c, ptr %base1
%res = select i1 %x, i64 %ext, i64 %b
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
index faf4dd0c57c7f..c64f5318fa3a5 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-ialu | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFB
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
-; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+short-forward-branch-iload | \
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
define i32 @test_i8_s(ptr %base, i1 zeroext %x, i32 %b) nounwind {
@@ -2574,140 +2574,56 @@ entry:
define i32 @test_i8_s_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_s_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB33_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: bnez a1, .LBB33_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB33_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB33_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: bnez a1, .LBB33_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB33_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: mv s1, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s1, .LBB33_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB33_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB33_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB33_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB33_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB33_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: mv s1, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB33_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB33_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB33_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB33_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB33_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB33_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -2720,134 +2636,56 @@ entry:
define i32 @test_i8_z_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i8_z_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s1, .LBB34_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: bnez a1, .LBB34_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB34_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB34_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: bnez a1, .LBB34_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB34_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: mv s1, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s1, .LBB34_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB34_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s0, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB34_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB34_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB34_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB34_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: mv s1, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB34_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB34_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s0, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB34_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB34_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB34_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB34_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -2860,140 +2698,56 @@ entry:
define i32 @test_i16_s_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_s_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB35_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: bnez a1, .LBB35_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB35_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB35_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: bnez a1, .LBB35_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB35_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: mv s1, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB35_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB35_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB35_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB35_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB35_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB35_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: mv s1, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB35_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB35_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB35_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB35_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB35_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB35_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -3006,140 +2760,56 @@ entry:
define i32 @test_i16_z_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i16_z_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s1, .LBB36_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: bnez a1, .LBB36_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB36_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB36_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: bnez a1, .LBB36_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB36_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: mv s1, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s1, .LBB36_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB36_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s0, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB36_2: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB36_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB36_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB36_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: mv s1, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s1, .LBB36_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB36_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s0, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB36_2: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB36_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB36_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB36_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -3152,128 +2822,56 @@ entry:
define i32 @test_i32_2(ptr %base, i1 zeroext %x, i32 %b) nounwind {
; RV32I-LABEL: test_i32_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: bnez s1, .LBB37_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: bnez a1, .LBB37_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB37_2: # %entry
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: bnez s1, .LBB37_2
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: bnez a1, .LBB37_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB37_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a2
-; RV32I-SFB-NEXT: mv s1, a1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s1, .LBB37_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB37_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB37_2: # %entry
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s1, .LBB37_2
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB37_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB37_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a2
-; RV32I-SFBILOAD-NEXT: mv s1, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s1, .LBB37_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB37_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB37_2: # %entry
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB37_2
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB37_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB37_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -3285,182 +2883,62 @@ entry:
define i32 @test_i8_s_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB38_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 24
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB38_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB38_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB38_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB38_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB38_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: mv s3, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s3, .LBB38_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB38_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 24
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB38_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB38_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB38_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB38_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: mv s3, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB38_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB38_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB38_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB38_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB38_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB38_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -3474,176 +2952,62 @@ entry:
define i32 @test_i8_z_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB39_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s0, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB39_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB39_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB39_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB39_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB39_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: mv s3, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s3, .LBB39_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB39_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: zext.b s2, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB39_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB39_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB39_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB39_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: mv s3, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB39_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB39_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s2, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB39_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB39_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB39_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB39_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
@@ -3657,182 +3021,62 @@ entry:
define i32 @test_i16_s_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB40_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 16
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB40_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB40_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB40_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB40_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB40_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: mv s3, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB40_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB40_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB40_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB40_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB40_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB40_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: mv s3, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB40_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB40_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB40_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB40_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB40_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB40_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -3846,182 +3090,62 @@ entry:
define i32 @test_i16_z_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: beqz s3, .LBB41_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s0, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB41_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB41_2: # %entry
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB41_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB41_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB41_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: mv s3, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s3, .LBB41_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB41_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srli s2, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB41_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: mv a0, s2
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB41_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB41_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB41_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: mv s3, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s3, .LBB41_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB41_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srli s2, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB41_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: mv a0, s2
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB41_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB41_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB41_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
@@ -4035,170 +3159,62 @@ entry:
define i32 @test_i32_store_2(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
; RV32I-LABEL: test_i32_store_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a4
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: addi a0, a0, 16
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_4
-; RV32I-NEXT: sw s1, 0(s2)
-; RV32I-NEXT: bnez s3, .LBB42_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB42_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB42_2: # %entry
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_store_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 16
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_4
-; RV64I-NEXT: sw s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB42_2
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB42_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB42_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_store_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a4
-; RV32I-SFB-NEXT: mv s1, a3
-; RV32I-SFB-NEXT: mv s2, a2
-; RV32I-SFB-NEXT: mv s3, a1
-; RV32I-SFB-NEXT: addi a0, a0, 16
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_4
-; RV32I-SFB-NEXT: bnez s3, .LBB42_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB42_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s2
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB42_2: # %entry
-; RV32I-SFB-NEXT: sw s0, 0(s1)
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a4, 0(a3)
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_store_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 16
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_4
-; RV64I-SFB-NEXT: bnez s3, .LBB42_2
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB42_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB42_2: # %entry
-; RV64I-SFB-NEXT: sw s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sw a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_store_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a4
-; RV32I-SFBILOAD-NEXT: mv s1, a3
-; RV32I-SFBILOAD-NEXT: mv s2, a2
-; RV32I-SFBILOAD-NEXT: mv s3, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_4
-; RV32I-SFBILOAD-NEXT: bnez s3, .LBB42_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB42_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s2
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB42_2: # %entry
-; RV32I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_store_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 16
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_4
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB42_2
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB42_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB42_2: # %entry
-; RV64I-SFBILOAD-NEXT: sw s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
@@ -4211,161 +3227,71 @@ entry:
define i64 @test_i8_s_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_s_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB43_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB43_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: bnez a1, .LBB43_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB43_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB43_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: bnez a1, .LBB43_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB43_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: mv s2, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s2, .LBB43_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB43_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB43_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB43_4
+; RV32I-SFB-NEXT: beqz a1, .LBB43_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB43_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s1, .LBB43_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB43_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB43_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: mv s2, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB43_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB43_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB43_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB43_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB43_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB43_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB43_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB43_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB43_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -4378,155 +3304,68 @@ entry:
define i64 @test_i8_z_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i8_z_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: beqz s2, .LBB44_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: bnez a1, .LBB44_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB44_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: beqz s1, .LBB44_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: bnez a1, .LBB44_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB44_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: mv s2, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s2, .LBB44_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB44_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB44_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB44_4
+; RV32I-SFB-NEXT: bnez a1, .LBB44_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s1, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB44_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s1, .LBB44_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB44_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s0, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB44_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: mv s2, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB44_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB44_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB44_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB44_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB44_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s1, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB44_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB44_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB44_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s0, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB44_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -4539,161 +3378,71 @@ entry:
define i64 @test_i16_s_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_s_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB45_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB45_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: bnez a1, .LBB45_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB45_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB45_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: bnez a1, .LBB45_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB45_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: mv s2, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB45_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB45_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s1, a0, 16
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB45_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB45_4
+; RV32I-SFB-NEXT: beqz a1, .LBB45_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s0, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB45_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB45_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB45_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB45_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: mv s2, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB45_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB45_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s1, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB45_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB45_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB45_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s0, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB45_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB45_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB45_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB45_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4706,161 +3455,68 @@ entry:
define i64 @test_i16_z_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i16_z_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: beqz s2, .LBB46_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: bnez a1, .LBB46_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB46_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: beqz s1, .LBB46_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: bnez a1, .LBB46_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB46_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -16
-; RV32I-SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: mv s2, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s2, .LBB46_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB46_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB46_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB46_4
+; RV32I-SFB-NEXT: bnez a1, .LBB46_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s1, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB46_4: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 16
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s1, .LBB46_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB46_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s0, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB46_2: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -16
-; RV32I-SFBILOAD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: mv s2, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB46_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB46_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB46_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB46_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB46_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s1, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB46_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 16
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB46_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB46_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s0, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB46_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -4873,171 +3529,72 @@ entry:
define i64 @test_i32_z_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
; RV32I-LABEL: test_i32_z_1_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: addi a2, sp, 12
-; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: beqz s2, .LBB47_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: bnez a1, .LBB47_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB47_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 0
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: beqz s1, .LBB47_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: lwu a0, 16(a0)
+; RV64I-NEXT: bnez a1, .LBB47_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB47_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_1_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a3
-; RV32I-SFB-NEXT: mv s1, a2
-; RV32I-SFB-NEXT: mv s2, a1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: addi a2, sp, 12
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 12(sp)
-; RV32I-SFB-NEXT: bnez s2, .LBB47_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB47_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB47_2: # %entry
-; RV32I-SFB-NEXT: beqz s2, .LBB47_4
+; RV32I-SFB-NEXT: bnez a1, .LBB47_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB47_4: # %entry
-; RV32I-SFB-NEXT: mv a1, s0
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 0
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s1, .LBB47_2
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB47_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB47_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_1_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a3
-; RV32I-SFBILOAD-NEXT: mv s1, a2
-; RV32I-SFBILOAD-NEXT: mv s2, a1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB47_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB47_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB47_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s2, .LBB47_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB47_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB47_4: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s1
-; RV32I-SFBILOAD-NEXT: mv a1, s0
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: beqz s1, .LBB47_2
+; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB47_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB47_2: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
+ %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
%ext = zext i32 %val to i64 ; zero-extend to 64 bits
%res = select i1 %x, i64 %ext, i64 %b
ret i64 %res
@@ -5071,23 +3628,11 @@ define i64 @test_i64_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
;
; RV64I-LABEL: test_i64_1_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s1, a1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: bnez s1, .LBB48_2
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: bnez a1, .LBB48_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB48_2: # %entry
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_1_2:
@@ -5120,23 +3665,11 @@ define i64 @test_i64_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
;
; RV64I-SFB-LABEL: test_i64_1_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -32
-; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a2
-; RV64I-SFB-NEXT: mv s1, a1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s1, .LBB48_2
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB48_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB48_2: # %entry
-; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 32
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_1_2:
@@ -5169,23 +3702,11 @@ define i64 @test_i64_1_2(ptr %base, i1 zeroext %x, i64 %b) nounwind {
;
; RV64I-SFBILOAD-LABEL: test_i64_1_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a2
-; RV64I-SFBILOAD-NEXT: mv s1, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s1, .LBB48_2
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB48_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB48_2: # %entry
-; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
@@ -5197,215 +3718,80 @@ entry:
define i64 @test_i8_s_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_s_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s5, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB49_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 24
-; RV32I-NEXT: .LBB49_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB49_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB49_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_s_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB49_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai s0, a0, 56
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB49_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB49_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_s_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: mv s5, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: slli a0, a0, 24
-; RV32I-SFB-NEXT: beqz s5, .LBB49_2
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB49_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 24
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB49_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB49_4
+; RV32I-SFB-NEXT: beqz a1, .LBB49_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB49_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_s_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: slli a0, a0, 56
-; RV64I-SFB-NEXT: beqz s3, .LBB49_2
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB49_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 56
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB49_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_s_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: mv s5, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 24
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB49_2
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB49_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 24
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB49_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB49_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB49_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB49_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_s_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 56
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB49_2
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB49_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 56
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB49_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -5419,209 +3805,77 @@ entry:
define i64 @test_i8_z_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i8_z_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s3, a6
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a0, a0, 4
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_1
-; RV32I-NEXT: sw s4, 0(s5)
-; RV32I-NEXT: sw s3, 4(s5)
-; RV32I-NEXT: beqz s2, .LBB50_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: zext.b s1, a0
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB50_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB50_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i8_z_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 4
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_1
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB50_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: zext.b s0, a0
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB50_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB50_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i8_z_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: mv s5, a1
-; RV32I-SFB-NEXT: addi a0, a0, 4
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_1
-; RV32I-SFB-NEXT: beqz s5, .LBB50_2
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB50_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB50_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB50_4
+; RV32I-SFB-NEXT: bnez a1, .LBB50_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: zext.b s4, a0
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB50_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i8_z_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 4
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_1
-; RV64I-SFB-NEXT: beqz s3, .LBB50_2
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB50_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: zext.b s2, a0
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB50_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i8_z_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: mv s5, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_1
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB50_2
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB50_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB50_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB50_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB50_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: zext.b s4, a0
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB50_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i8_z_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 4
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_1
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB50_2
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB50_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: zext.b s2, a0
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB50_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
@@ -5635,215 +3889,80 @@ entry:
define i64 @test_i16_s_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_s_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a6
-; RV32I-NEXT: mv s3, a5
-; RV32I-NEXT: mv s4, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s5, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s3, 0(s4)
-; RV32I-NEXT: sw s2, 4(s4)
-; RV32I-NEXT: beqz s5, .LBB51_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai s0, a0, 31
-; RV32I-NEXT: srai s1, a0, 16
-; RV32I-NEXT: .LBB51_2: # %entry
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB51_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB51_2:
+; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_s_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB51_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai s0, a0, 48
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB51_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB51_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_s_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: mv s5, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB51_2
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: beqz a1, .LBB51_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: srai s4, a0, 16
+; RV32I-SFB-NEXT: mv a2, a0
; RV32I-SFB-NEXT: .LBB51_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB51_4
+; RV32I-SFB-NEXT: beqz a1, .LBB51_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srai s3, a0, 31
+; RV32I-SFB-NEXT: srai a3, a0, 31
; RV32I-SFB-NEXT: .LBB51_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_s_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB51_2
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB51_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srai s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB51_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_s_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: mv s5, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB51_2
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB51_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: srai s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a2, a0
; RV32I-SFBILOAD-NEXT: .LBB51_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB51_4
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB51_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srai s3, a0, 31
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
; RV32I-SFBILOAD-NEXT: .LBB51_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_s_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB51_2
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB51_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srai s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB51_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -5857,215 +3976,77 @@ entry:
define i64 @test_i16_z_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i16_z_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s3, a6
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s1, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: addi a0, a0, 8
-; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: call __atomic_load_2
-; RV32I-NEXT: sw s4, 0(s5)
-; RV32I-NEXT: sw s3, 4(s5)
-; RV32I-NEXT: beqz s2, .LBB52_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli s1, a0, 16
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB52_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB52_2: # %entry
-; RV32I-NEXT: addi a1, s2, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i16_z_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 8
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_2
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: beqz s3, .LBB52_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli s0, a0, 48
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB52_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB52_2: # %entry
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i16_z_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: mv s5, a1
-; RV32I-SFB-NEXT: addi a0, a0, 8
-; RV32I-SFB-NEXT: li a1, 0
-; RV32I-SFB-NEXT: call __atomic_load_2
-; RV32I-SFB-NEXT: slli a0, a0, 16
-; RV32I-SFB-NEXT: beqz s5, .LBB52_2
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB52_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB52_2: # %entry
-; RV32I-SFB-NEXT: beqz s5, .LBB52_4
+; RV32I-SFB-NEXT: bnez a1, .LBB52_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: srli s4, a0, 16
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB52_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a0, s4
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i16_z_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 8
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_2
-; RV64I-SFB-NEXT: slli a0, a0, 48
-; RV64I-SFB-NEXT: beqz s3, .LBB52_2
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB52_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: srli s2, a0, 48
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB52_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: mv a0, s2
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i16_z_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: mv s5, a1
-; RV32I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV32I-SFBILOAD-NEXT: li a1, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load_2
-; RV32I-SFBILOAD-NEXT: slli a0, a0, 16
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB52_2
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB52_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB52_2: # %entry
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB52_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB52_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: srli s4, a0, 16
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB52_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a0, s4
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i16_z_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 8
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_2
-; RV64I-SFBILOAD-NEXT: slli a0, a0, 48
-; RV64I-SFBILOAD-NEXT: beqz s3, .LBB52_2
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB52_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: srli s2, a0, 48
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB52_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: mv a0, s2
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
@@ -6079,225 +4060,81 @@ entry:
define i64 @test_i32_z_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i64 %c) nounwind {
; RV32I-LABEL: test_i32_z_store_64_2:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s3, a6
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: mv s2, a2
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: addi a1, a0, 16
-; RV32I-NEXT: li a0, 4
-; RV32I-NEXT: mv a2, sp
-; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: call __atomic_load
-; RV32I-NEXT: lw a0, 0(sp)
-; RV32I-NEXT: sw s4, 0(s5)
-; RV32I-NEXT: sw s3, 4(s5)
-; RV32I-NEXT: bnez s1, .LBB53_2
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: sw a5, 0(a4)
+; RV32I-NEXT: sw a6, 4(a4)
+; RV32I-NEXT: bnez a1, .LBB53_2
; RV32I-NEXT: # %bb.1: # %entry
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB53_2: # %entry
-; RV32I-NEXT: addi a1, s1, -1
-; RV32I-NEXT: and a1, a1, s0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_i32_z_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a1, a0, 16
-; RV64I-NEXT: li a0, 4
-; RV64I-NEXT: addi a2, sp, 4
-; RV64I-NEXT: li a3, 0
-; RV64I-NEXT: call __atomic_load
-; RV64I-NEXT: lwu a0, 4(sp)
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB53_2
+; RV64I-NEXT: lwu a0, 16(a0)
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB53_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB53_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i32_z_store_64_2:
; RV32I-SFB: # %bb.0: # %entry
-; RV32I-SFB-NEXT: addi sp, sp, -32
-; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFB-NEXT: mv s0, a6
-; RV32I-SFB-NEXT: mv s1, a5
-; RV32I-SFB-NEXT: mv s2, a4
-; RV32I-SFB-NEXT: mv s3, a3
-; RV32I-SFB-NEXT: mv s4, a2
-; RV32I-SFB-NEXT: mv s5, a1
-; RV32I-SFB-NEXT: addi a1, a0, 16
-; RV32I-SFB-NEXT: li a0, 4
-; RV32I-SFB-NEXT: mv a2, sp
-; RV32I-SFB-NEXT: li a3, 0
-; RV32I-SFB-NEXT: call __atomic_load
-; RV32I-SFB-NEXT: lw a0, 0(sp)
-; RV32I-SFB-NEXT: beqz s5, .LBB53_2
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB53_2
; RV32I-SFB-NEXT: # %bb.1: # %entry
-; RV32I-SFB-NEXT: li s3, 0
+; RV32I-SFB-NEXT: li a3, 0
; RV32I-SFB-NEXT: .LBB53_2: # %entry
-; RV32I-SFB-NEXT: bnez s5, .LBB53_4
+; RV32I-SFB-NEXT: bnez a1, .LBB53_4
; RV32I-SFB-NEXT: # %bb.3: # %entry
-; RV32I-SFB-NEXT: mv a0, s4
+; RV32I-SFB-NEXT: mv a0, a2
; RV32I-SFB-NEXT: .LBB53_4: # %entry
-; RV32I-SFB-NEXT: sw s1, 0(s2)
-; RV32I-SFB-NEXT: sw s0, 4(s2)
-; RV32I-SFB-NEXT: mv a1, s3
-; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: sw a5, 0(a4)
+; RV32I-SFB-NEXT: sw a6, 4(a4)
+; RV32I-SFB-NEXT: mv a1, a3
; RV32I-SFB-NEXT: ret
;
; RV64I-SFB-LABEL: test_i32_z_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a1, a0, 16
-; RV64I-SFB-NEXT: li a0, 4
-; RV64I-SFB-NEXT: addi a2, sp, 4
-; RV64I-SFB-NEXT: li a3, 0
-; RV64I-SFB-NEXT: call __atomic_load
-; RV64I-SFB-NEXT: lwu a0, 4(sp)
-; RV64I-SFB-NEXT: bnez s3, .LBB53_2
+; RV64I-SFB-NEXT: lwu a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB53_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB53_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i32_z_store_64_2:
; RV32I-SFBILOAD: # %bb.0: # %entry
-; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
-; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-SFBILOAD-NEXT: mv s0, a6
-; RV32I-SFBILOAD-NEXT: mv s1, a5
-; RV32I-SFBILOAD-NEXT: mv s2, a4
-; RV32I-SFBILOAD-NEXT: mv s3, a3
-; RV32I-SFBILOAD-NEXT: mv s4, a2
-; RV32I-SFBILOAD-NEXT: mv s5, a1
-; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV32I-SFBILOAD-NEXT: li a0, 4
-; RV32I-SFBILOAD-NEXT: mv a2, sp
-; RV32I-SFBILOAD-NEXT: li a3, 0
-; RV32I-SFBILOAD-NEXT: call __atomic_load
-; RV32I-SFBILOAD-NEXT: lw a0, 0(sp)
-; RV32I-SFBILOAD-NEXT: beqz s5, .LBB53_2
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB53_2
; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV32I-SFBILOAD-NEXT: li s3, 0
+; RV32I-SFBILOAD-NEXT: li a3, 0
; RV32I-SFBILOAD-NEXT: .LBB53_2: # %entry
-; RV32I-SFBILOAD-NEXT: bnez s5, .LBB53_4
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB53_4
; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
-; RV32I-SFBILOAD-NEXT: mv a0, s4
+; RV32I-SFBILOAD-NEXT: mv a0, a2
; RV32I-SFBILOAD-NEXT: .LBB53_4: # %entry
-; RV32I-SFBILOAD-NEXT: sw s1, 0(s2)
-; RV32I-SFBILOAD-NEXT: sw s0, 4(s2)
-; RV32I-SFBILOAD-NEXT: mv a1, s3
-; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: sw a5, 0(a4)
+; RV32I-SFBILOAD-NEXT: sw a6, 4(a4)
+; RV32I-SFBILOAD-NEXT: mv a1, a3
; RV32I-SFBILOAD-NEXT: ret
;
; RV64I-SFBILOAD-LABEL: test_i32_z_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
-; RV64I-SFBILOAD-NEXT: li a0, 4
-; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
-; RV64I-SFBILOAD-NEXT: li a3, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load
-; RV64I-SFBILOAD-NEXT: lwu a0, 4(sp)
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB53_2
+; RV64I-SFBILOAD-NEXT: lwu a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB53_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB53_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
- %val = load atomic i32, ptr %addr monotonic, align 2 ; load 32-bit value
+ %val = load atomic i32, ptr %addr monotonic, align 4 ; load 32-bit value
%ext = zext i32 %val to i64 ; zero-extend to 64 bits
store i64 %c, ptr %base1
%res = select i1 %x, i64 %ext, i64 %b
@@ -6343,30 +4180,12 @@ define i64 @test_i64_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i6
;
; RV64I-LABEL: test_i64_store_64_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s1, a4
-; RV64I-NEXT: mv s2, a3
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s3, a1
-; RV64I-NEXT: addi a0, a0, 32
-; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: call __atomic_load_8
-; RV64I-NEXT: sd s1, 0(s2)
-; RV64I-NEXT: bnez s3, .LBB54_2
+; RV64I-NEXT: ld a0, 32(a0)
+; RV64I-NEXT: sd a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB54_2
; RV64I-NEXT: # %bb.1: # %entry
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: .LBB54_2: # %entry
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV32I-SFB-LABEL: test_i64_store_64_2:
@@ -6410,30 +4229,12 @@ define i64 @test_i64_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i6
;
; RV64I-SFB-LABEL: test_i64_store_64_2:
; RV64I-SFB: # %bb.0: # %entry
-; RV64I-SFB-NEXT: addi sp, sp, -48
-; RV64I-SFB-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFB-NEXT: mv s0, a4
-; RV64I-SFB-NEXT: mv s1, a3
-; RV64I-SFB-NEXT: mv s2, a2
-; RV64I-SFB-NEXT: mv s3, a1
-; RV64I-SFB-NEXT: addi a0, a0, 32
-; RV64I-SFB-NEXT: li a1, 0
-; RV64I-SFB-NEXT: call __atomic_load_8
-; RV64I-SFB-NEXT: bnez s3, .LBB54_2
+; RV64I-SFB-NEXT: ld a0, 32(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB54_2
; RV64I-SFB-NEXT: # %bb.1: # %entry
-; RV64I-SFB-NEXT: mv a0, s2
+; RV64I-SFB-NEXT: mv a0, a2
; RV64I-SFB-NEXT: .LBB54_2: # %entry
-; RV64I-SFB-NEXT: sd s0, 0(s1)
-; RV64I-SFB-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFB-NEXT: addi sp, sp, 48
+; RV64I-SFB-NEXT: sd a4, 0(a3)
; RV64I-SFB-NEXT: ret
;
; RV32I-SFBILOAD-LABEL: test_i64_store_64_2:
@@ -6477,30 +4278,12 @@ define i64 @test_i64_store_64_2(ptr %base, i1 zeroext %x, i64 %b, ptr %base1, i6
;
; RV64I-SFBILOAD-LABEL: test_i64_store_64_2:
; RV64I-SFBILOAD: # %bb.0: # %entry
-; RV64I-SFBILOAD-NEXT: addi sp, sp, -48
-; RV64I-SFBILOAD-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-SFBILOAD-NEXT: mv s0, a4
-; RV64I-SFBILOAD-NEXT: mv s1, a3
-; RV64I-SFBILOAD-NEXT: mv s2, a2
-; RV64I-SFBILOAD-NEXT: mv s3, a1
-; RV64I-SFBILOAD-NEXT: addi a0, a0, 32
-; RV64I-SFBILOAD-NEXT: li a1, 0
-; RV64I-SFBILOAD-NEXT: call __atomic_load_8
-; RV64I-SFBILOAD-NEXT: bnez s3, .LBB54_2
+; RV64I-SFBILOAD-NEXT: ld a0, 32(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB54_2
; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
-; RV64I-SFBILOAD-NEXT: mv a0, s2
+; RV64I-SFBILOAD-NEXT: mv a0, a2
; RV64I-SFBILOAD-NEXT: .LBB54_2: # %entry
-; RV64I-SFBILOAD-NEXT: sd s0, 0(s1)
-; RV64I-SFBILOAD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-SFBILOAD-NEXT: addi sp, sp, 48
+; RV64I-SFBILOAD-NEXT: sd a4, 0(a3)
; RV64I-SFBILOAD-NEXT: ret
entry:
%addr = getelementptr i64, ptr %base, i64 4 ; compute base + 4
>From 633230a1f3a9bca8c9ce4848d6da6fcce1fb5649 Mon Sep 17 00:00:00 2001
From: Harsh Chandel <hchandel at qti.qualcomm.com>
Date: Wed, 10 Dec 2025 09:50:37 +0530
Subject: [PATCH 11/11] fixup! Address comments
Change-Id: I24e728250783c8331d12882ab8198e3824c819b2
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 27b9f45877c40..489cf96f38750 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -897,7 +897,7 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
.addImm(0);
}
-unsigned getLoadPredicatedOpcode(unsigned Opcode) {
+static unsigned getLoadPredicatedOpcode(unsigned Opcode) {
switch (Opcode) {
case RISCV::LB:
return RISCV::PseudoCCLB;
@@ -941,11 +941,8 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
// Create a new predicated version of DefMI.
MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), InsertPt,
- MI.getDebugLoc(), get(PredOpc), DestReg);
-
- // Copy the condition portion.
- NewMI.add(MI.getOperand(1));
- NewMI.add(MI.getOperand(2));
+ MI.getDebugLoc(), get(PredOpc), DestReg)
+ .add({MI.getOperand(1), MI.getOperand(2)});
// Add condition code, inverting if necessary.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
- Previous message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Next message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list