[llvm] 37d0f20 - Revert "[RISCV] Add a pass to remove ADDI by reassociating to fold into load/store address. (#127151)"
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 11:58:12 PST 2025
Author: Craig Topper
Date: 2025-02-19T11:57:53-08:00
New Revision: 37d0f20593a65c552d717561efb64c8cf29c1d3c
URL: https://github.com/llvm/llvm-project/commit/37d0f20593a65c552d717561efb64c8cf29c1d3c
DIFF: https://github.com/llvm/llvm-project/commit/37d0f20593a65c552d717561efb64c8cf29c1d3c.diff
LOG: Revert "[RISCV] Add a pass to remove ADDI by reassociating to fold into load/store address. (#127151)"
This reverts commit c3ebbfd7368ec3e4737427eef602296a868a4ecd.
Seeing some test failures on the build bot.
Added:
Modified:
llvm/lib/Target/RISCV/CMakeLists.txt
llvm/lib/Target/RISCV/RISCV.h
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/O3-pipeline.ll
llvm/test/CodeGen/RISCV/split-offsets.ll
llvm/test/CodeGen/RISCV/xtheadmemidx.ll
Removed:
llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp
llvm/test/CodeGen/RISCV/fold-mem-offset.ll
################################################################################
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 5d1ea50eba494..9b23a5ab521c8 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -37,7 +37,6 @@ add_llvm_target(RISCVCodeGen
RISCVMakeCompressible.cpp
RISCVExpandAtomicPseudoInsts.cpp
RISCVExpandPseudoInsts.cpp
- RISCVFoldMemOffset.cpp
RISCVFrameLowering.cpp
RISCVGatherScatterLowering.cpp
RISCVIndirectBranchTracking.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 641e2eb4094f9..851eea1352852 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -52,9 +52,6 @@ void initializeRISCVVectorPeepholePass(PassRegistry &);
FunctionPass *createRISCVOptWInstrsPass();
void initializeRISCVOptWInstrsPass(PassRegistry &);
-FunctionPass *createRISCVFoldMemOffsetPass();
-void initializeRISCVFoldMemOffsetPass(PassRegistry &);
-
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp b/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp
deleted file mode 100644
index 989e9d859d64f..0000000000000
--- a/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-//===- RISCVFoldMemOffset.cpp - Fold ADDI into memory offsets ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===---------------------------------------------------------------------===//
-//
-// Look for ADDIs that can be removed by folding their immediate into later
-// load/store addresses. There may be other arithmetic instructions between the
-// addi and load/store that we need to reassociate through. If the final result
-// of the arithmetic is only used by load/store addresses, we can fold the
-// offset into the all the load/store as long as it doesn't create an offset
-// that is too large.
-//
-//===---------------------------------------------------------------------===//
-
-#include "RISCV.h"
-#include "RISCVSubtarget.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include <queue>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "riscv-fold-mem-offset"
-#define RISCV_FOLD_MEM_OFFSET_NAME "RISC-V Fold Memory Offset"
-
-namespace {
-
-class RISCVFoldMemOffset : public MachineFunctionPass {
-public:
- static char ID;
-
- RISCVFoldMemOffset() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- bool foldOffset(Register OrigReg, int64_t InitialOffset,
- const MachineRegisterInfo &MRI,
- DenseMap<MachineInstr *, int64_t> &FoldableInstrs);
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- StringRef getPassName() const override { return RISCV_FOLD_MEM_OFFSET_NAME; }
-};
-
-// Wrapper class around a std::optional to allow accumulation.
-class FoldableOffset {
- std::optional<int64_t> Offset;
-
-public:
- bool hasValue() const { return Offset.has_value(); }
- int64_t getValue() const { return *Offset; }
-
- FoldableOffset &operator=(int64_t RHS) {
- Offset = RHS;
- return *this;
- }
-
- FoldableOffset &operator+=(int64_t RHS) {
- if (!Offset)
- Offset = 0;
- Offset = (uint64_t)*Offset + (uint64_t)RHS;
- return *this;
- }
-
- int64_t operator*() { return *Offset; }
-};
-
-} // end anonymous namespace
-
-char RISCVFoldMemOffset::ID = 0;
-INITIALIZE_PASS(RISCVFoldMemOffset, DEBUG_TYPE, RISCV_FOLD_MEM_OFFSET_NAME,
- false, false)
-
-FunctionPass *llvm::createRISCVFoldMemOffsetPass() {
- return new RISCVFoldMemOffset();
-}
-
-// Walk forward from the ADDI looking for arithmetic instructions we can
-// analyze or memory instructions that use it as part of their address
-// calculation. For each arithmetic instruction we lookup how the offset
-// contributes to the value in that register use that information to
-// calculate the contribution to the output of this instruction.
-// Only addition and left shift are supported.
-// FIXME: Add multiplication by constant. The constant will be in a register.
-bool RISCVFoldMemOffset::foldOffset(
- Register OrigReg, int64_t InitialOffset, const MachineRegisterInfo &MRI,
- DenseMap<MachineInstr *, int64_t> &FoldableInstrs) {
- // Map to hold how much the offset contributes to the value of this register.
- DenseMap<Register, int64_t> RegToOffsetMap;
-
- // Insert root offset into the map.
- RegToOffsetMap[OrigReg] = InitialOffset;
-
- std::queue<Register> Worklist;
- Worklist.push(OrigReg);
-
- while (!Worklist.empty()) {
- Register Reg = Worklist.front();
- Worklist.pop();
-
- if (!Reg.isVirtual())
- return false;
-
- for (auto &User : MRI.use_nodbg_instructions(Reg)) {
- FoldableOffset Offset;
-
- switch (User.getOpcode()) {
- default:
- return false;
- case RISCV::ADD:
- if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
- I != RegToOffsetMap.end())
- Offset = I->second;
- if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
- I != RegToOffsetMap.end())
- Offset += I->second;
- break;
- case RISCV::SH1ADD:
- if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
- I != RegToOffsetMap.end())
- Offset = (uint64_t)I->second << 1;
- if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
- I != RegToOffsetMap.end())
- Offset += I->second;
- break;
- case RISCV::SH2ADD:
- if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
- I != RegToOffsetMap.end())
- Offset = (uint64_t)I->second << 2;
- if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
- I != RegToOffsetMap.end())
- Offset += I->second;
- break;
- case RISCV::SH3ADD:
- if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
- I != RegToOffsetMap.end())
- Offset = (uint64_t)I->second << 3;
- if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
- I != RegToOffsetMap.end())
- Offset += I->second;
- break;
- case RISCV::ADD_UW:
- case RISCV::SH1ADD_UW:
- case RISCV::SH2ADD_UW:
- case RISCV::SH3ADD_UW:
- // Don't fold through the zero extended input.
- if (User.getOperand(1).getReg() == Reg)
- return false;
- if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
- I != RegToOffsetMap.end())
- Offset = I->second;
- break;
- case RISCV::SLLI: {
- unsigned ShAmt = User.getOperand(2).getImm();
- if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
- I != RegToOffsetMap.end())
- Offset = (uint64_t)I->second << ShAmt;
- break;
- }
- case RISCV::LB:
- case RISCV::LBU:
- case RISCV::SB:
- case RISCV::LH:
- case RISCV::LH_INX:
- case RISCV::LHU:
- case RISCV::FLH:
- case RISCV::SH:
- case RISCV::SH_INX:
- case RISCV::FSH:
- case RISCV::LW:
- case RISCV::LW_INX:
- case RISCV::LWU:
- case RISCV::FLW:
- case RISCV::SW:
- case RISCV::SW_INX:
- case RISCV::FSW:
- case RISCV::LD:
- case RISCV::FLD:
- case RISCV::SD:
- case RISCV::FSD: {
- // Can't fold into store value.
- if (User.getOperand(0).getReg() == Reg)
- return false;
-
- // Existing offset must be immediate.
- if (!User.getOperand(2).isImm())
- return false;
-
- // Require at least one operation between the ADDI and the load/store.
- // We have other optimizations that should handle the simple case.
- if (User.getOperand(1).getReg() == OrigReg)
- return false;
-
- auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
- if (I == RegToOffsetMap.end())
- return false;
-
- int64_t LocalOffset = User.getOperand(2).getImm();
- assert(isInt<12>(LocalOffset));
- int64_t CombinedOffset = (uint64_t)LocalOffset + (uint64_t)I->second;
- if (!isInt<12>(CombinedOffset))
- return false;
-
- FoldableInstrs[&User] = CombinedOffset;
- continue;
- }
- }
-
- // If we reach here we should have an accumulated offset.
- assert(Offset.hasValue() && "Expected an offset");
-
- // If the offset is new or changed, add the destination register to the
- // work list.
- int64_t OffsetVal = Offset.getValue();
- auto P =
- RegToOffsetMap.try_emplace(User.getOperand(0).getReg(), OffsetVal);
- if (P.second) {
- Worklist.push(User.getOperand(0).getReg());
- } else if (P.first->second != OffsetVal) {
- P.first->second = OffsetVal;
- Worklist.push(User.getOperand(0).getReg());
- }
- }
- }
-
- return true;
-}
-
-bool RISCVFoldMemOffset::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
-
- // This optimization may increase size by preventing compression.
- if (MF.getFunction().hasOptSize())
- return false;
-
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- bool MadeChange = false;
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
- // FIXME: We can support ADDIW from an LUI+ADDIW pair if the result is
- // equivalent to LUI+ADDI.
- if (MI.getOpcode() != RISCV::ADDI)
- continue;
-
- // We only want to optimize register ADDIs.
- if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
- continue;
-
- // Ignore 'li'.
- if (MI.getOperand(1).getReg() == RISCV::X0)
- continue;
-
- int64_t Offset = MI.getOperand(2).getImm();
- assert(isInt<12>(Offset));
-
- DenseMap<MachineInstr *, int64_t> FoldableInstrs;
-
- if (!foldOffset(MI.getOperand(0).getReg(), Offset, MRI, FoldableInstrs))
- continue;
-
- if (FoldableInstrs.empty())
- continue;
-
- // We can fold this ADDI.
- // Rewrite all the instructions.
- for (auto [MemMI, NewOffset] : FoldableInstrs)
- MemMI->getOperand(2).setImm(NewOffset);
-
- MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
- MI.eraseFromParent();
- }
- }
-
- return MadeChange;
-}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 89e017807363b..167dbb53c5950 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -133,7 +133,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVPostRAExpandPseudoPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVOptWInstrsPass(*PR);
- initializeRISCVFoldMemOffsetPass(*PR);
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVVectorPeepholePass(*PR);
@@ -591,7 +590,6 @@ void RISCVPassConfig::addMachineSSAOptimization() {
addPass(createRISCVVectorPeepholePass());
// TODO: Move this to pre regalloc
addPass(createRISCVVMV0EliminationPass());
- addPass(createRISCVFoldMemOffsetPass());
TargetPassConfig::addMachineSSAOptimization();
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 194223eee69eb..2646dfeca4eb6 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -98,7 +98,6 @@
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: RISC-V Vector Peephole Optimization
; CHECK-NEXT: RISC-V VMV0 Elimination
-; CHECK-NEXT: RISC-V Fold Memory Offset
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
; CHECK-NEXT: Optimize machine instruction PHIs
diff --git a/llvm/test/CodeGen/RISCV/fold-mem-offset.ll b/llvm/test/CodeGen/RISCV/fold-mem-offset.ll
deleted file mode 100644
index b12fa509b0bea..0000000000000
--- a/llvm/test/CodeGen/RISCV/fold-mem-offset.ll
+++ /dev/null
@@ -1,733 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=CHECK,RV32I
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=CHECK,RV64I
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zba | FileCheck %s --check-prefixes=ZBA,RV32ZBA
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zba | FileCheck %s --check-prefixes=ZBA,RV64ZBA
-
-define i64 @test_sh3add(ptr %p, iXLen %x, iXLen %y) {
-; RV32I-LABEL: test_sh3add:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a1, a1, 3
-; RV32I-NEXT: slli a2, a2, 3
-; RV32I-NEXT: add a1, a1, a0
-; RV32I-NEXT: add a0, a0, a2
-; RV32I-NEXT: lw a2, 480(a1)
-; RV32I-NEXT: lw a1, 484(a1)
-; RV32I-NEXT: lw a3, 400(a0)
-; RV32I-NEXT: lw a0, 404(a0)
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a3, a2
-; RV32I-NEXT: sltu a2, a0, a3
-; RV32I-NEXT: add a1, a1, a2
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_sh3add:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 3
-; RV64I-NEXT: slli a2, a2, 3
-; RV64I-NEXT: add a1, a1, a0
-; RV64I-NEXT: add a0, a0, a2
-; RV64I-NEXT: ld a1, 480(a1)
-; RV64I-NEXT: ld a0, 400(a0)
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_sh3add:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh3add a1, a1, a0
-; RV32ZBA-NEXT: sh3add a0, a2, a0
-; RV32ZBA-NEXT: lw a2, 480(a1)
-; RV32ZBA-NEXT: lw a1, 484(a1)
-; RV32ZBA-NEXT: lw a3, 400(a0)
-; RV32ZBA-NEXT: lw a0, 404(a0)
-; RV32ZBA-NEXT: add a1, a0, a1
-; RV32ZBA-NEXT: add a0, a3, a2
-; RV32ZBA-NEXT: sltu a2, a0, a3
-; RV32ZBA-NEXT: add a1, a1, a2
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_sh3add:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh3add a1, a1, a0
-; RV64ZBA-NEXT: sh3add a0, a2, a0
-; RV64ZBA-NEXT: ld a1, 480(a1)
-; RV64ZBA-NEXT: ld a0, 400(a0)
-; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: ret
-entry:
- %b = getelementptr inbounds nuw i8, ptr %p, i64 400
- %add = add iXLen %x, 10
- %arrayidx = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, iXLen %add
- %0 = load i64, ptr %arrayidx, align 8
- %arrayidx2 = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, iXLen %y
- %1 = load i64, ptr %arrayidx2, align 8
- %add3 = add nsw i64 %1, %0
- ret i64 %add3
-}
-
-define signext i32 @test_sh2add(ptr %p, iXLen %x, iXLen %y) {
-; RV32I-LABEL: test_sh2add:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a2, a0
-; RV32I-NEXT: lw a1, 1200(a1)
-; RV32I-NEXT: lw a0, 1240(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_sh2add:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: slli a2, a2, 2
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: lw a1, 1200(a1)
-; RV64I-NEXT: lw a0, 1240(a0)
-; RV64I-NEXT: addw a0, a0, a1
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_sh2add:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh2add a1, a1, a0
-; RV32ZBA-NEXT: sh2add a0, a2, a0
-; RV32ZBA-NEXT: lw a1, 1200(a1)
-; RV32ZBA-NEXT: lw a0, 1240(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_sh2add:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh2add a1, a1, a0
-; RV64ZBA-NEXT: sh2add a0, a2, a0
-; RV64ZBA-NEXT: lw a1, 1200(a1)
-; RV64ZBA-NEXT: lw a0, 1240(a0)
-; RV64ZBA-NEXT: addw a0, a0, a1
-; RV64ZBA-NEXT: ret
-entry:
- %c = getelementptr inbounds nuw i8, ptr %p, i64 1200
- %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %x
- %0 = load i32, ptr %arrayidx, align 4
- %add = add iXLen %y, 10
- %arrayidx2 = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %add
- %1 = load i32, ptr %arrayidx2, align 4
- %add3 = add nsw i32 %1, %0
- ret i32 %add3
-}
-
-define signext i16 @test_sh1add(ptr %p, iXLen %x, iXLen %y) {
-; RV32I-LABEL: test_sh1add:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a2, a0
-; RV32I-NEXT: lh a1, 1600(a1)
-; RV32I-NEXT: lh a0, 1620(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_sh1add:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: slli a2, a2, 1
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: lh a1, 1600(a1)
-; RV64I-NEXT: lh a0, 1620(a0)
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_sh1add:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh1add a1, a1, a0
-; RV32ZBA-NEXT: sh1add a0, a2, a0
-; RV32ZBA-NEXT: lh a1, 1600(a1)
-; RV32ZBA-NEXT: lh a0, 1620(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: slli a0, a0, 16
-; RV32ZBA-NEXT: srai a0, a0, 16
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_sh1add:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh1add a1, a1, a0
-; RV64ZBA-NEXT: sh1add a0, a2, a0
-; RV64ZBA-NEXT: lh a1, 1600(a1)
-; RV64ZBA-NEXT: lh a0, 1620(a0)
-; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: slli a0, a0, 48
-; RV64ZBA-NEXT: srai a0, a0, 48
-; RV64ZBA-NEXT: ret
-entry:
- %d = getelementptr inbounds nuw i8, ptr %p, i64 1600
- %arrayidx = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %x
- %0 = load i16, ptr %arrayidx, align 2
- %add = add iXLen %y, 10
- %arrayidx2 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %add
- %1 = load i16, ptr %arrayidx2, align 2
- %add4 = add i16 %1, %0
- ret i16 %add4
-}
-
-define zeroext i8 @test_add(ptr %p, iXLen %x, iXLen %y) {
-; CHECK-LABEL: test_add:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: add a0, a2, a0
-; CHECK-NEXT: lbu a1, 1800(a1)
-; CHECK-NEXT: lbu a0, 1810(a0)
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: andi a0, a0, 255
-; CHECK-NEXT: ret
-;
-; ZBA-LABEL: test_add:
-; ZBA: # %bb.0: # %entry
-; ZBA-NEXT: add a1, a0, a1
-; ZBA-NEXT: add a0, a2, a0
-; ZBA-NEXT: lbu a1, 1800(a1)
-; ZBA-NEXT: lbu a0, 1810(a0)
-; ZBA-NEXT: add a0, a0, a1
-; ZBA-NEXT: andi a0, a0, 255
-; ZBA-NEXT: ret
-entry:
- %e = getelementptr inbounds nuw i8, ptr %p, i64 1800
- %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x
- %0 = load i8, ptr %arrayidx, align 1
- %add = add iXLen %y, 10
- %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add
- %1 = load i8, ptr %arrayidx2, align 1
- %add4 = add i8 %1, %0
- ret i8 %add4
-}
-
-define i64 @test_sh3add_uw(ptr %p, i32 signext %x, i32 signext %y) {
-; RV32I-LABEL: test_sh3add_uw:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a1, a1, 3
-; RV32I-NEXT: slli a2, a2, 3
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a0, a2
-; RV32I-NEXT: lw a2, 400(a1)
-; RV32I-NEXT: lw a1, 404(a1)
-; RV32I-NEXT: lw a3, 400(a0)
-; RV32I-NEXT: lw a0, 404(a0)
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a3, a2
-; RV32I-NEXT: sltu a2, a0, a3
-; RV32I-NEXT: add a1, a1, a2
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_sh3add_uw:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: srli a1, a1, 29
-; RV64I-NEXT: srli a2, a2, 29
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: add a0, a0, a2
-; RV64I-NEXT: ld a1, 400(a1)
-; RV64I-NEXT: ld a0, 400(a0)
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_sh3add_uw:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh3add a1, a1, a0
-; RV32ZBA-NEXT: sh3add a0, a2, a0
-; RV32ZBA-NEXT: lw a2, 400(a1)
-; RV32ZBA-NEXT: lw a1, 404(a1)
-; RV32ZBA-NEXT: lw a3, 400(a0)
-; RV32ZBA-NEXT: lw a0, 404(a0)
-; RV32ZBA-NEXT: add a1, a0, a1
-; RV32ZBA-NEXT: add a0, a3, a2
-; RV32ZBA-NEXT: sltu a2, a0, a3
-; RV32ZBA-NEXT: add a1, a1, a2
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_sh3add_uw:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh3add.uw a1, a1, a0
-; RV64ZBA-NEXT: sh3add.uw a0, a2, a0
-; RV64ZBA-NEXT: ld a1, 400(a1)
-; RV64ZBA-NEXT: ld a0, 400(a0)
-; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: ret
-entry:
- %b = getelementptr inbounds nuw i8, ptr %p, i64 400
- %idxprom = zext i32 %x to i64
- %arrayidx = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, i64 %idxprom
- %0 = load i64, ptr %arrayidx, align 8
- %idxprom2 = zext i32 %y to i64
- %arrayidx3 = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, i64 %idxprom2
- %1 = load i64, ptr %arrayidx3, align 8
- %add4 = add nsw i64 %1, %0
- ret i64 %add4
-}
-
-define signext i32 @test_sh2add_uw(ptr %p, i32 signext %x, i32 signext %y) {
-; RV32I-LABEL: test_sh2add_uw:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a0, a2
-; RV32I-NEXT: lw a1, 1200(a1)
-; RV32I-NEXT: lw a0, 1200(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_sh2add_uw:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: srli a1, a1, 30
-; RV64I-NEXT: srli a2, a2, 30
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: add a0, a0, a2
-; RV64I-NEXT: lw a1, 1200(a1)
-; RV64I-NEXT: lw a0, 1200(a0)
-; RV64I-NEXT: addw a0, a0, a1
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_sh2add_uw:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh2add a1, a1, a0
-; RV32ZBA-NEXT: sh2add a0, a2, a0
-; RV32ZBA-NEXT: lw a1, 1200(a1)
-; RV32ZBA-NEXT: lw a0, 1200(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_sh2add_uw:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh2add.uw a1, a1, a0
-; RV64ZBA-NEXT: sh2add.uw a0, a2, a0
-; RV64ZBA-NEXT: lw a1, 1200(a1)
-; RV64ZBA-NEXT: lw a0, 1200(a0)
-; RV64ZBA-NEXT: addw a0, a0, a1
-; RV64ZBA-NEXT: ret
-entry:
- %c = getelementptr inbounds nuw i8, ptr %p, i64 1200
- %idxprom = zext i32 %x to i64
- %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, i64 %idxprom
- %0 = load i32, ptr %arrayidx, align 4
- %idxprom2 = zext i32 %y to i64
- %arrayidx3 = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, i64 %idxprom2
- %1 = load i32, ptr %arrayidx3, align 4
- %add4 = add nsw i32 %1, %0
- ret i32 %add4
-}
-
-define signext i16 @test_sh1add_uw(ptr %p, i32 signext %x, i32 signext %y) {
-; RV32I-LABEL: test_sh1add_uw:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: slli a2, a2, 1
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a2, a0
-; RV32I-NEXT: lh a1, 1600(a1)
-; RV32I-NEXT: lh a0, 1620(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_sh1add_uw:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: addi a2, a2, 10
-; RV64I-NEXT: srli a1, a1, 31
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: srli a2, a2, 31
-; RV64I-NEXT: add a0, a0, a2
-; RV64I-NEXT: lh a1, 1600(a1)
-; RV64I-NEXT: lh a0, 1600(a0)
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_sh1add_uw:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh1add a1, a1, a0
-; RV32ZBA-NEXT: sh1add a0, a2, a0
-; RV32ZBA-NEXT: lh a1, 1600(a1)
-; RV32ZBA-NEXT: lh a0, 1620(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: slli a0, a0, 16
-; RV32ZBA-NEXT: srai a0, a0, 16
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_sh1add_uw:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh1add.uw a1, a1, a0
-; RV64ZBA-NEXT: addi a2, a2, 10
-; RV64ZBA-NEXT: sh1add.uw a0, a2, a0
-; RV64ZBA-NEXT: lh a1, 1600(a1)
-; RV64ZBA-NEXT: lh a0, 1600(a0)
-; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: slli a0, a0, 48
-; RV64ZBA-NEXT: srai a0, a0, 48
-; RV64ZBA-NEXT: ret
-entry:
- %d = getelementptr inbounds nuw i8, ptr %p, i64 1600
- %idxprom = zext i32 %x to i64
- %arrayidx = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, i64 %idxprom
- %0 = load i16, ptr %arrayidx, align 2
- %add = add i32 %y, 10
- %idxprom2 = zext i32 %add to i64
- %arrayidx3 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, i64 %idxprom2
- %1 = load i16, ptr %arrayidx3, align 2
- %add5 = add i16 %1, %0
- ret i16 %add5
-}
-
-define zeroext i8 @test_add_uw(ptr %p, i32 signext %x, i32 signext %y) {
-; RV32I-LABEL: test_add_uw:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a0, a2
-; RV32I-NEXT: lbu a1, 1800(a1)
-; RV32I-NEXT: lbu a0, 1800(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_add_uw:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: srli a1, a1, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: add a0, a0, a2
-; RV64I-NEXT: lbu a1, 1800(a1)
-; RV64I-NEXT: lbu a0, 1800(a0)
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: andi a0, a0, 255
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_add_uw:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: add a1, a0, a1
-; RV32ZBA-NEXT: add a0, a0, a2
-; RV32ZBA-NEXT: lbu a1, 1800(a1)
-; RV32ZBA-NEXT: lbu a0, 1800(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: andi a0, a0, 255
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_add_uw:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: add.uw a1, a1, a0
-; RV64ZBA-NEXT: add.uw a0, a2, a0
-; RV64ZBA-NEXT: lbu a1, 1800(a1)
-; RV64ZBA-NEXT: lbu a0, 1800(a0)
-; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: andi a0, a0, 255
-; RV64ZBA-NEXT: ret
-entry:
- %e = getelementptr inbounds nuw i8, ptr %p, i64 1800
- %idxprom = zext i32 %x to i64
- %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, i64 %idxprom
- %0 = load i8, ptr %arrayidx, align 1
- %idxprom2 = zext i32 %y to i64
- %arrayidx3 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, i64 %idxprom2
- %1 = load i8, ptr %arrayidx3, align 1
- %add5 = add i8 %1, %0
- ret i8 %add5
-}
-
-; The addi is part of the index and used with 2
diff erent scales.
-define signext i32 @test_scaled_index_addi(ptr %p, iXLen %x) {
-; RV32I-LABEL: test_scaled_index_addi:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a2, a1, 2
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: add a2, a0, a2
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lw a1, 1196(a2)
-; RV32I-NEXT: lh a0, 1598(a0)
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_scaled_index_addi:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a2, a1, 2
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: add a2, a0, a2
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lw a1, 1196(a2)
-; RV64I-NEXT: lh a0, 1598(a0)
-; RV64I-NEXT: addw a0, a1, a0
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_scaled_index_addi:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh2add a2, a1, a0
-; RV32ZBA-NEXT: sh1add a0, a1, a0
-; RV32ZBA-NEXT: lw a1, 1196(a2)
-; RV32ZBA-NEXT: lh a0, 1598(a0)
-; RV32ZBA-NEXT: add a0, a1, a0
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_scaled_index_addi:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh2add a2, a1, a0
-; RV64ZBA-NEXT: sh1add a0, a1, a0
-; RV64ZBA-NEXT: lw a1, 1196(a2)
-; RV64ZBA-NEXT: lh a0, 1598(a0)
-; RV64ZBA-NEXT: addw a0, a1, a0
-; RV64ZBA-NEXT: ret
-entry:
- %c = getelementptr inbounds nuw i8, ptr %p, i64 1200
- %sub = add iXLen %x, -1
- %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %sub
- %0 = load i32, ptr %arrayidx, align 4
- %d = getelementptr inbounds nuw i8, ptr %p, i64 1600
- %arrayidx2 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %sub
- %1 = load i16, ptr %arrayidx2, align 2
- %conv = sext i16 %1 to i32
- %add = add nsw i32 %0, %conv
- ret i32 %add
-}
-
-; Offset is a pair of addis. We can fold one of them.
-define signext i32 @test_medium_offset(ptr %p, iXLen %x, iXLen %y) {
-; RV32I-LABEL: test_medium_offset:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi a0, a0, 2047
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a2, a0
-; RV32I-NEXT: lw a1, 753(a1)
-; RV32I-NEXT: lw a0, 793(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_medium_offset:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi a0, a0, 2047
-; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: slli a2, a2, 2
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: lw a1, 753(a1)
-; RV64I-NEXT: lw a0, 793(a0)
-; RV64I-NEXT: addw a0, a0, a1
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_medium_offset:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: addi a0, a0, 2047
-; RV32ZBA-NEXT: sh2add a1, a1, a0
-; RV32ZBA-NEXT: sh2add a0, a2, a0
-; RV32ZBA-NEXT: lw a1, 753(a1)
-; RV32ZBA-NEXT: lw a0, 793(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_medium_offset:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: addi a0, a0, 2047
-; RV64ZBA-NEXT: sh2add a1, a1, a0
-; RV64ZBA-NEXT: sh2add a0, a2, a0
-; RV64ZBA-NEXT: lw a1, 753(a1)
-; RV64ZBA-NEXT: lw a0, 793(a0)
-; RV64ZBA-NEXT: addw a0, a0, a1
-; RV64ZBA-NEXT: ret
-entry:
- %f = getelementptr inbounds nuw i8, ptr %p, i64 2800
- %arrayidx = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %x
- %0 = load i32, ptr %arrayidx, align 4
- %add = add iXLen %y, 10
- %arrayidx2 = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %add
- %1 = load i32, ptr %arrayidx2, align 4
- %add3 = add nsw i32 %1, %0
- ret i32 %add3
-}
-
-; Offset is a lui+addiw. We can't fold this on RV64.
-define signext i32 @test_large_offset(ptr %p, iXLen %x, iXLen %y) {
-; RV32I-LABEL: test_large_offset:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: lui a3, 2
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: slli a2, a2, 2
-; RV32I-NEXT: add a0, a0, a3
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: add a0, a2, a0
-; RV32I-NEXT: lw a1, -1392(a1)
-; RV32I-NEXT: lw a0, -1352(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_large_offset:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: lui a3, 2
-; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: slli a2, a2, 2
-; RV64I-NEXT: addiw a3, a3, -1392
-; RV64I-NEXT: add a0, a0, a3
-; RV64I-NEXT: add a1, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: lw a1, 0(a1)
-; RV64I-NEXT: lw a0, 40(a0)
-; RV64I-NEXT: addw a0, a0, a1
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_large_offset:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: li a3, 1700
-; RV32ZBA-NEXT: sh2add a0, a3, a0
-; RV32ZBA-NEXT: sh2add a1, a1, a0
-; RV32ZBA-NEXT: sh2add a0, a2, a0
-; RV32ZBA-NEXT: lw a1, 0(a1)
-; RV32ZBA-NEXT: lw a0, 40(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_large_offset:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: li a3, 1700
-; RV64ZBA-NEXT: sh2add a0, a3, a0
-; RV64ZBA-NEXT: sh2add a1, a1, a0
-; RV64ZBA-NEXT: sh2add a0, a2, a0
-; RV64ZBA-NEXT: lw a1, 0(a1)
-; RV64ZBA-NEXT: lw a0, 40(a0)
-; RV64ZBA-NEXT: addw a0, a0, a1
-; RV64ZBA-NEXT: ret
-entry:
- %g = getelementptr inbounds nuw i8, ptr %p, i64 6800
- %arrayidx = getelementptr inbounds nuw [200 x i32], ptr %g, i64 0, iXLen %x
- %0 = load i32, ptr %arrayidx, align 4
- %add = add iXLen %y, 10
- %arrayidx2 = getelementptr inbounds nuw [200 x i32], ptr %g, i64 0, iXLen %add
- %1 = load i32, ptr %arrayidx2, align 4
- %add3 = add nsw i32 %1, %0
- ret i32 %add3
-}
-
-; After folding we can CSE the sh2add
-define signext i32 @test_cse(ptr %p, iXLen %x) {
-; RV32I-LABEL: test_cse:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: slli a1, a1, 2
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lw a1, 1200(a0)
-; RV32I-NEXT: addi a0, a0, 2047
-; RV32I-NEXT: lw a0, 753(a0)
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: test_cse:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lw a1, 1200(a0)
-; RV64I-NEXT: addi a0, a0, 2047
-; RV64I-NEXT: lw a0, 753(a0)
-; RV64I-NEXT: addw a0, a0, a1
-; RV64I-NEXT: ret
-;
-; RV32ZBA-LABEL: test_cse:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: sh2add a0, a1, a0
-; RV32ZBA-NEXT: lw a1, 1200(a0)
-; RV32ZBA-NEXT: addi a0, a0, 2047
-; RV32ZBA-NEXT: lw a0, 753(a0)
-; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: ret
-;
-; RV64ZBA-LABEL: test_cse:
-; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: sh2add a0, a1, a0
-; RV64ZBA-NEXT: lw a1, 1200(a0)
-; RV64ZBA-NEXT: addi a0, a0, 2047
-; RV64ZBA-NEXT: lw a0, 753(a0)
-; RV64ZBA-NEXT: addw a0, a0, a1
-; RV64ZBA-NEXT: ret
-entry:
- %c = getelementptr inbounds nuw i8, ptr %p, i64 1200
- %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %x
- %0 = load i32, ptr %arrayidx, align 4
- %f = getelementptr inbounds nuw i8, ptr %p, i64 2800
- %arrayidx1 = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %x
- %1 = load i32, ptr %arrayidx1, align 4
- %add = add nsw i32 %1, %0
- ret i32 %add
-}
-
-define zeroext i8 @test_optsize(ptr %p, iXLen %x, iXLen %y) optsize {
-; CHECK-LABEL: test_optsize:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi a0, a0, 1800
-; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: add a0, a2, a0
-; CHECK-NEXT: lbu a1, 0(a1)
-; CHECK-NEXT: lbu a0, 10(a0)
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: andi a0, a0, 255
-; CHECK-NEXT: ret
-;
-; ZBA-LABEL: test_optsize:
-; ZBA: # %bb.0: # %entry
-; ZBA-NEXT: addi a0, a0, 1800
-; ZBA-NEXT: add a1, a0, a1
-; ZBA-NEXT: add a0, a2, a0
-; ZBA-NEXT: lbu a1, 0(a1)
-; ZBA-NEXT: lbu a0, 10(a0)
-; ZBA-NEXT: add a0, a0, a1
-; ZBA-NEXT: andi a0, a0, 255
-; ZBA-NEXT: ret
-entry:
- %e = getelementptr inbounds nuw i8, ptr %p, i64 1800
- %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x
- %0 = load i8, ptr %arrayidx, align 1
- %add = add iXLen %y, 10
- %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add
- %1 = load i8, ptr %arrayidx2, align 1
- %add4 = add i8 %1, %0
- ret i8 %add4
-}
-
-define zeroext i8 @test_minsize(ptr %p, iXLen %x, iXLen %y) minsize {
-; CHECK-LABEL: test_minsize:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi a0, a0, 1800
-; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: add a0, a2, a0
-; CHECK-NEXT: lbu a1, 0(a1)
-; CHECK-NEXT: lbu a0, 10(a0)
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: andi a0, a0, 255
-; CHECK-NEXT: ret
-;
-; ZBA-LABEL: test_minsize:
-; ZBA: # %bb.0: # %entry
-; ZBA-NEXT: addi a0, a0, 1800
-; ZBA-NEXT: add a1, a0, a1
-; ZBA-NEXT: add a0, a2, a0
-; ZBA-NEXT: lbu a1, 0(a1)
-; ZBA-NEXT: lbu a0, 10(a0)
-; ZBA-NEXT: add a0, a0, a1
-; ZBA-NEXT: andi a0, a0, 255
-; ZBA-NEXT: ret
-entry:
- %e = getelementptr inbounds nuw i8, ptr %p, i64 1800
- %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x
- %0 = load i8, ptr %arrayidx, align 1
- %add = add iXLen %y, 10
- %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add
- %1 = load i8, ptr %arrayidx2, align 1
- %add4 = add i8 %1, %0
- ret i8 %add4
-}
diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll
index b98aa954c09e7..8f5b044c3b3b8 100644
--- a/llvm/test/CodeGen/RISCV/split-offsets.ll
+++ b/llvm/test/CodeGen/RISCV/split-offsets.ll
@@ -14,13 +14,14 @@ define void @test1(ptr %sp, ptr %t, i32 %n) {
; RV32I-NEXT: lui a2, 20
; RV32I-NEXT: lw a0, 0(a0)
; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: addi a2, a2, -1920
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: add a0, a0, a2
; RV32I-NEXT: li a2, 1
-; RV32I-NEXT: sw a3, -1920(a0)
-; RV32I-NEXT: sw a2, -1916(a0)
-; RV32I-NEXT: sw a2, -1920(a1)
-; RV32I-NEXT: sw a3, -1916(a1)
+; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a2, 0(a1)
+; RV32I-NEXT: sw a3, 4(a1)
; RV32I-NEXT: ret
;
; RV64I-LABEL: test1:
@@ -57,16 +58,17 @@ define void @test2(ptr %sp, ptr %t, i32 %n) {
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: lw a0, 0(a0)
; RV32I-NEXT: lui a4, 20
+; RV32I-NEXT: addi a4, a4, -1920
; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: add a0, a0, a4
; RV32I-NEXT: blez a2, .LBB1_2
; RV32I-NEXT: .LBB1_1: # %while_body
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: addi a4, a3, 1
-; RV32I-NEXT: sw a4, -1920(a0)
-; RV32I-NEXT: sw a3, -1916(a0)
-; RV32I-NEXT: sw a4, -1920(a1)
-; RV32I-NEXT: sw a3, -1916(a1)
+; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a4, 0(a1)
+; RV32I-NEXT: sw a3, 4(a1)
; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: blt a4, a2, .LBB1_1
; RV32I-NEXT: .LBB1_2: # %while_end
@@ -124,10 +126,11 @@ define void @test3(ptr %t) {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a1, 20
; RV32I-NEXT: li a2, 2
+; RV32I-NEXT: addi a1, a1, -1920
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 3
-; RV32I-NEXT: sw a2, -1916(a0)
-; RV32I-NEXT: sw a1, -1912(a0)
+; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a1, 8(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: test3:
diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
index 578f51a957a75..e761fcb736a87 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
@@ -1136,9 +1136,10 @@ define i64 @lrd_large_offset(ptr %a, i64 %b) {
; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3
; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0
; RV32XTHEADMEMIDX-NEXT: lui a1, 23
+; RV32XTHEADMEMIDX-NEXT: addi a1, a1, 1792
; RV32XTHEADMEMIDX-NEXT: add a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: lw a0, 1792(a1)
-; RV32XTHEADMEMIDX-NEXT: lw a1, 1796(a1)
+; RV32XTHEADMEMIDX-NEXT: lw a0, 0(a1)
+; RV32XTHEADMEMIDX-NEXT: lw a1, 4(a1)
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lrd_large_offset:
More information about the llvm-commits
mailing list