[lld] [llvm] [RISCV] Teach RISCVMergeBaseOffset to merge %lo into load/store folding arithmetic (PR #185353)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 23:22:16 PDT 2026
https://github.com/LiqinWeng created https://github.com/llvm/llvm-project/pull/185353
It's possible we have:
1. first scenarios, the add opcode can be add or add.uw
```
lui vr1, %hi(sym)
addi vr1, vr1, %lo(sym)
add vr2, vrx, vr1
lbu vr3, off(vr2)
-----Transform-----
lui vr1, %hi(sym+off)
add vr2, vrx, vr1, %gprel_add(sym+off)
lbu vr3, %gprel_lo(sym+off)(vr2)
```
2. second scenarios, the add opcode can be sh1add/sh2add/sh3add/sh1add.uw/sh2add.uw/sh3add.uw
```
lui vr1, %hi(sym)
addi vr1, vr1, %lo(sym)
sh1add vr2, vrx, vr1
lbu vr3, off(vr2)
-----Transform-----
lui vr1, %hi(sym+off)
sh1add vr2, vrx, vr1, %gprel_shxadd(sym+off)
lbu vr3, %gprel_lo(sym+off)(vr2)
```
3. third scenarios, same as shxadd
```
addi vrx, vr0, offAddi
lui vr1, %hi(sym)
addi vr1, vr1, %lo(sym)
add vr2, vrx, vr1
lbu vr3, off(vr2)
-----Transform-----
lui vr1, %hi(sym+off+offAddi)
add vr2, vr0, vr1, %gprel_add(sym+off+offAddi)
lbu vr3, %gprel_lo(sym+off+offAddi)(vr2)
```
>From f06ac077e454072acbd81ec32120372ec547a131 Mon Sep 17 00:00:00 2001
From: wengliqin <liqin.weng at spacemit.com>
Date: Mon, 9 Mar 2026 14:10:33 +0800
Subject: [PATCH] [RISCV] Teach RISCVMergeBaseOffset to merge %lo into
load/store after folding arithmetic
---
lld/ELF/Arch/RISCV.cpp | 62 +-
lld/test/ELF/riscv-relax-gprel-add.s | 71 +
lld/test/ELF/riscv-relax-gprel-shxadd.s | 70 +
.../llvm/BinaryFormat/ELFRelocs/RISCV.def | 4 +
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 13 +-
.../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 6 +
.../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 4 +
.../MCTargetDesc/RISCVELFObjectWriter.cpp | 8 +
.../RISCV/MCTargetDesc/RISCVFixupKinds.h | 9 +
.../RISCV/MCTargetDesc/RISCVMCAsmInfo.h | 1 +
.../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 93 +-
.../Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp | 9 +
llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 9 +
llvm/lib/Target/RISCV/RISCVInstrFormats.td | 11 +
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 +
llvm/lib/Target/RISCV/RISCVInstrInfo.td | 19 +
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 17 +
.../lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 261 +++-
.../RISCV/fold-addi-with-add-into-memops.ll | 1232 +++++++++++++++++
...op-strength-reduce-add-cheaper-than-mul.ll | 10 +-
llvm/test/CodeGen/RISCV/lpad.ll | 20 +-
llvm/test/MC/RISCV/Relocations/relocations.s | 48 +
llvm/test/MC/RISCV/rv32zba-invalid.s | 5 +
llvm/test/MC/RISCV/rv64zba-invalid.s | 9 +
llvm/test/MC/RISCV/rv64zbb-invalid.s | 2 +-
25 files changed, 1971 insertions(+), 25 deletions(-)
create mode 100644 lld/test/ELF/riscv-relax-gprel-add.s
create mode 100644 lld/test/ELF/riscv-relax-gprel-shxadd.s
create mode 100644 llvm/test/CodeGen/RISCV/fold-addi-with-add-into-memops.ll
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 85f49c9260565..ccaed6b675d7b 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -74,6 +74,10 @@ class RISCV final : public TargetInfo {
#define INTERNAL_R_RISCV_GPREL_S 257
#define INTERNAL_R_RISCV_X0REL_I 258
#define INTERNAL_R_RISCV_X0REL_S 259
+#define INTERNAL_R_RISCV_GPREL_ADD 260
+#define INTERNAL_R_RISCV_GPREL_SHXADD 261
+#define INTERNAL_R_RISCV_GPREL_ADD_I 262
+#define INTERNAL_R_RISCV_GPREL_ADD_S 263
const uint64_t dtpOffset = 0x800;
@@ -289,6 +293,10 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_HI20:
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
+ case R_RISCV_GPREL_ADD:
+ case R_RISCV_GPREL_SHXADD:
+ case R_RISCV_GPREL_LO12_I:
+ case R_RISCV_GPREL_LO12_S:
return R_ABS;
case R_RISCV_ADD8:
case R_RISCV_ADD16:
@@ -499,7 +507,8 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_TLSDESC_LOAD_LO12:
case R_RISCV_TLSDESC_ADD_LO12:
case R_RISCV_TPREL_LO12_I:
- case R_RISCV_LO12_I: {
+ case R_RISCV_LO12_I:
+ case R_RISCV_GPREL_LO12_I: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
write32le(loc, setLO12_I(read32le(loc), lo & 0xfff));
@@ -508,12 +517,40 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_PCREL_LO12_S:
case R_RISCV_TPREL_LO12_S:
- case R_RISCV_LO12_S: {
+ case R_RISCV_LO12_S:
+ case R_RISCV_GPREL_LO12_S: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
write32le(loc, setLO12_S(read32le(loc), lo));
return;
}
+ case R_RISCV_GPREL_ADD:
+ case R_RISCV_GPREL_SHXADD: {
+ write32le(loc, read32le(loc));
+ return;
+ }
+
+ case INTERNAL_R_RISCV_GPREL_ADD:
+ case INTERNAL_R_RISCV_GPREL_SHXADD: {
+ uint32_t insn = (read32le(loc) & ~(31 << 20)) | (X_GP << 20);
+ write32le(loc, insn);
+ return;
+ }
+
+ case INTERNAL_R_RISCV_GPREL_ADD_I:
+ case INTERNAL_R_RISCV_GPREL_ADD_S: {
+ Defined *gp = ctx.sym.riscvGlobalPointer;
+ int64_t displace = SignExtend64(val - gp->getVA(ctx), bits);
+ checkInt(ctx, loc, displace, 12, rel);
+ uint32_t insn = read32le(loc);
+ if (rel.type == INTERNAL_R_RISCV_GPREL_ADD_I)
+ insn = setLO12_I(insn, displace);
+ else
+ insn = setLO12_S(insn, displace);
+
+ write32le(loc, insn);
+ return;
+ }
case INTERNAL_R_RISCV_X0REL_I:
case INTERNAL_R_RISCV_X0REL_S: {
@@ -895,6 +932,19 @@ static void relaxHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
case R_RISCV_LO12_S:
sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_S;
break;
+
+ case R_RISCV_GPREL_ADD:
+ sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_ADD;
+ break;
+ case R_RISCV_GPREL_SHXADD:
+ sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_SHXADD;
+ break;
+ case R_RISCV_GPREL_LO12_I:
+ sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_ADD_I;
+ break;
+ case R_RISCV_GPREL_LO12_S:
+ sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_ADD_S;
+ break;
}
}
@@ -950,6 +1000,10 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
case R_RISCV_HI20:
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
+ case R_RISCV_GPREL_ADD:
+ case R_RISCV_GPREL_SHXADD:
+ case R_RISCV_GPREL_LO12_I:
+ case R_RISCV_GPREL_LO12_S:
if (relaxable(relocs, i))
relaxHi20Lo12(ctx, sec, i, loc, r, remove);
break;
@@ -1193,6 +1247,10 @@ void RISCV::finalizeRelax(int passes) const {
case INTERNAL_R_RISCV_GPREL_S:
case INTERNAL_R_RISCV_X0REL_I:
case INTERNAL_R_RISCV_X0REL_S:
+ case INTERNAL_R_RISCV_GPREL_ADD:
+ case INTERNAL_R_RISCV_GPREL_SHXADD:
+ case INTERNAL_R_RISCV_GPREL_ADD_I:
+ case INTERNAL_R_RISCV_GPREL_ADD_S:
break;
case R_RISCV_RELAX:
// Used by relaxTlsLe to indicate the relocation is ignored.
diff --git a/lld/test/ELF/riscv-relax-gprel-add.s b/lld/test/ELF/riscv-relax-gprel-add.s
new file mode 100644
index 0000000000000..67c74db3c2f9d
--- /dev/null
+++ b/lld/test/ELF/riscv-relax-gprel-add.s
@@ -0,0 +1,71 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && split-file %s %t && cd %t
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax a.s -o rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax a.s -o rv64.o
+
+# RUN: ld.lld --relax-gp --undefined=__global_pointer$ rv32.o lds -o rv32
+# RUN: ld.lld --relax-gp --undefined=__global_pointer$ rv64.o lds -o rv64
+# RUN: llvm-objdump -td -M no-aliases --no-show-raw-insn rv32 | FileCheck %s
+# RUN: llvm-objdump -td -M no-aliases --no-show-raw-insn rv64 | FileCheck %s
+
+# CHECK: 00000000 l .text {{0*}}0 $x
+
+# CHECK-NOT: lui
+# CHECK: addi a1, a1, -0x800
+# CHECK-NEXT: add a0, a0, gp
+# CHECK-NEXT: lw a0, -0x800(a0)
+# CHECK-NEXT: sw a0, -0x800(a0)
+# CHECK-NOT: lui
+# CHECK-NEXT: addi a1, a1, 0x7fa
+# CHECK-NEXT: add a0, a0, gp
+# CHECK-NEXT: lw a0, 0x7fa(a0)
+# CHECK-NEXT: sw a0, 0x7fa(a0)
+# CHECK-NEXT: lui a1, 0x201
+# CHECK-NEXT: addi a1, a1, 0xe
+# CHECK-NEXT: add a0, a0, a1
+# CHECK-NEXT: lw a0, 0xe(a0)
+# CHECK-NEXT: sw a0, 0xe(a0)
+# CHECK-EMPTY:
+# CHECK-NEXT: <a>:
+# CHECK-NEXT: addi a0, a0, 0x1
+
+#--- a.s
+.global _start
+_start:
+ slli a0, a0, 2
+ lui a1, %hi(array)
+ addi a1, a1, %gprel_lo(array)
+ add a0, a0, a1, %gprel_add(array)
+ lw a0, %gprel_lo(array)(a0)
+ sw a0, %gprel_lo(array)(a0)
+ lui a1, %hi(array1+10)
+ addi a1, a1, %gprel_lo(array1+10)
+ add a0, a0, a1, %gprel_add(array1+10)
+ lw a0, %gprel_lo(array1+10)(a0)
+ sw a0, %gprel_lo(array1+10)(a0)
+ lui a1, %hi(norelax+10)
+ addi a1, a1, %gprel_lo(norelax+10)
+ add a0, a0, a1, %gprel_add(norelax+10)
+ lw a0, %gprel_lo(norelax+10)(a0)
+ sw a0, %gprel_lo(norelax+10)(a0)
+a:
+ addi a0, a0, 1
+
+.section .sdata,"aw"
+array:
+ .zero 4080
+ .size array, 4080
+array1:
+ .zero 20
+ .size array, 20
+norelax:
+ .zero 6
+ .size array, 6
+
+#--- lds
+SECTIONS {
+ .text : {*(.text) }
+ .sdata 0x200000 : { }
+}
+
diff --git a/lld/test/ELF/riscv-relax-gprel-shxadd.s b/lld/test/ELF/riscv-relax-gprel-shxadd.s
new file mode 100644
index 0000000000000..9dab44b6baa26
--- /dev/null
+++ b/lld/test/ELF/riscv-relax-gprel-shxadd.s
@@ -0,0 +1,70 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && split-file %s %t && cd %t
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax,+zba a.s -o rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax,+zba a.s -o rv64.o
+
+# RUN: ld.lld --relax-gp --undefined=__global_pointer$ rv32.o lds -o rv32
+# RUN: ld.lld --relax-gp --undefined=__global_pointer$ rv64.o lds -o rv64
+# RUN: llvm-objdump --mattr=+zba -td -M no-aliases --no-show-raw-insn rv32 | FileCheck %s
+# RUN: llvm-objdump --mattr=+zba -td -M no-aliases --no-show-raw-insn rv64 | FileCheck %s
+
+# CHECK: 00000000 l .text {{0*}}0 $x
+
+# CHECK-NOT: lui
+# CHECK: addi a1, a1, -0x800
+# CHECK-NEXT: sh1add a0, a0, gp
+# CHECK-NEXT: lw a0, -0x800(a0)
+# CHECK-NEXT: sw a0, -0x800(a0)
+# CHECK-NOT: lui
+# CHECK-NEXT: addi a1, a1, 0x7fa
+# CHECK-NEXT: sh1add a0, a0, gp
+# CHECK-NEXT: lw a0, 0x7fa(a0)
+# CHECK-NEXT: sw a0, 0x7fa(a0)
+# CHECK-NEXT: lui a1, 0x201
+# CHECK-NEXT: addi a1, a1, 0xe
+# CHECK-NEXT: sh1add a0, a0, a1
+# CHECK-NEXT: lw a0, 0xe(a0)
+# CHECK-NEXT: sw a0, 0xe(a0)
+# CHECK-EMPTY:
+# CHECK-NEXT: <a>:
+# CHECK-NEXT: addi a0, a0, 0x1
+
+#--- a.s
+.global _start
+_start:
+ lui a1, %hi(array)
+ addi a1, a1, %gprel_lo(array)
+ sh1add a0, a0, a1, %gprel_shxadd(array)
+ lw a0, %gprel_lo(array)(a0)
+ sw a0, %gprel_lo(array)(a0)
+ lui a1, %hi(array1+10)
+ addi a1, a1, %gprel_lo(array1+10)
+ sh1add a0, a0, a1, %gprel_shxadd(array1+10)
+ lw a0, %gprel_lo(array1+10)(a0)
+ sw a0, %gprel_lo(array1+10)(a0)
+ lui a1, %hi(norelax+10)
+ addi a1, a1, %gprel_lo(norelax+10)
+ sh1add a0, a0, a1, %gprel_shxadd(norelax+10)
+ lw a0, %gprel_lo(norelax+10)(a0)
+ sw a0, %gprel_lo(norelax+10)(a0)
+a:
+ addi a0, a0, 1
+
+.section .sdata,"aw"
+array:
+ .zero 4080
+ .size array, 4080
+array1:
+ .zero 20
+ .size array, 20
+norelax:
+ .zero 6
+ .size array, 6
+
+#--- lds
+SECTIONS {
+ .text : {*(.text) }
+ .sdata 0x200000 : { }
+}
+
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
index ac9a089e853a6..a3d8743523ca9 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
@@ -60,6 +60,10 @@ ELF_RELOC(R_RISCV_TLSDESC_HI20, 62)
ELF_RELOC(R_RISCV_TLSDESC_LOAD_LO12, 63)
ELF_RELOC(R_RISCV_TLSDESC_ADD_LO12, 64)
ELF_RELOC(R_RISCV_TLSDESC_CALL, 65)
+ELF_RELOC(R_RISCV_GPREL_LO12_I, 66)
+ELF_RELOC(R_RISCV_GPREL_LO12_S, 67)
+ELF_RELOC(R_RISCV_GPREL_ADD, 68)
+ELF_RELOC(R_RISCV_GPREL_SHXADD, 69)
ELF_RELOC(R_RISCV_VENDOR, 191)
ELF_RELOC(R_RISCV_CUSTOM192, 192)
ELF_RELOC(R_RISCV_CUSTOM193, 193)
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 443b0b5f3c04b..1468ae35cb96e 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -608,6 +608,17 @@ struct RISCVOperand final : public MCParsedAsmOperand {
VK == ELF::R_RISCV_TPREL_ADD;
}
+ bool isGPRelAddSymbol() const {
+ int64_t Imm;
+ // Must be of 'immediate' type but not a constant.
+ if (!isExpr() || evaluateConstantExpr(getExpr(), Imm))
+ return false;
+
+ RISCV::Specifier VK = RISCV::S_None;
+ return RISCVAsmParser::classifySymbolRef(getExpr(), VK) &&
+ (VK == ELF::R_RISCV_GPREL_ADD || VK == ELF::R_RISCV_GPREL_SHXADD);
+ }
+
bool isTLSDESCCallSymbol() const {
int64_t Imm;
// Must be of 'immediate' type but not a constant.
@@ -869,7 +880,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
RISCV::Specifier VK = RISCV::S_None;
return RISCVAsmParser::classifySymbolRef(getExpr(), VK) &&
- (VK == RISCV::S_LO || VK == RISCV::S_PCREL_LO ||
+ (VK == RISCV::S_LO || VK == RISCV::S_PCREL_LO || VK == RISCV::S_GPREL_LO ||
VK == RISCV::S_TPREL_LO || VK == ELF::R_RISCV_TLSDESC_LOAD_LO12 ||
VK == ELF::R_RISCV_TLSDESC_ADD_LO12);
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 0f63b02b54c74..04852901d57b3 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -77,6 +77,10 @@ MCFixupKindInfo RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_riscv_lo12_i", 20, 12, 0},
{"fixup_riscv_12_i", 20, 12, 0},
{"fixup_riscv_lo12_s", 0, 32, 0},
+ {"fixup_riscv_gprel_lo12_i", 20, 12, 0},
+ {"fixup_riscv_gprel_lo12_s", 0, 32, 0},
+ {"fixup_riscv_gprel_add", 0, 0, 0},
+ {"fixup_riscv_gprel_shxadd", 0, 0, 0},
{"fixup_riscv_pcrel_hi20", 12, 20, 0},
{"fixup_riscv_pcrel_lo12_i", 20, 12, 0},
{"fixup_riscv_pcrel_lo12_s", 0, 32, 0},
@@ -506,6 +510,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case FK_Data_leb128:
return Value;
case RISCV::fixup_riscv_lo12_i:
+ case RISCV::fixup_riscv_gprel_lo12_i:
case RISCV::fixup_riscv_pcrel_lo12_i:
return Value & 0xfff;
case RISCV::fixup_riscv_12_i:
@@ -515,6 +520,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
return Value & 0xfff;
case RISCV::fixup_riscv_lo12_s:
+ case RISCV::fixup_riscv_gprel_lo12_s:
case RISCV::fixup_riscv_pcrel_lo12_s:
return (((Value >> 5) & 0x7f) << 25) | ((Value & 0x1f) << 7);
case RISCV::fixup_riscv_hi20:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index c4d54bdd60737..01b16cee5c599 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -470,6 +470,10 @@ enum {
MO_TLSDESC_ADD_LO = 15,
MO_TLSDESC_CALL = 16,
+ MO_GPREL_LO = 17,
+ MO_GPREL_ADD = 18,
+ MO_GPREL_SHXADD = 19,
+
// Used to differentiate between target-specific "direct" flags and "bitmask"
// flags. A machine operand can only have one "direct" flag, but can have
// multiple "bitmask" flags.
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 2885e3cca8722..613c8b1c8ac3c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -135,6 +135,14 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup,
return ELF::R_RISCV_LO12_I;
case RISCV::fixup_riscv_lo12_s:
return ELF::R_RISCV_LO12_S;
+ case RISCV::fixup_riscv_gprel_lo12_i:
+ return ELF::R_RISCV_GPREL_LO12_I;
+ case RISCV::fixup_riscv_gprel_lo12_s:
+ return ELF::R_RISCV_GPREL_LO12_S;
+ case RISCV::fixup_riscv_gprel_add:
+ return ELF::R_RISCV_GPREL_ADD;
+ case RISCV::fixup_riscv_gprel_shxadd:
+ return ELF::R_RISCV_GPREL_SHXADD;
case RISCV::fixup_riscv_rvc_imm:
reportError(Fixup.getLoc(), "No relocation for CI-type instructions");
return ELF::R_RISCV_NONE;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index a2b75e4a42e76..f56d54d8fb3f4 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -24,6 +24,15 @@ enum Fixups {
fixup_riscv_12_i,
// 12-bit fixup corresponding to %lo(foo) for the S-type store instructions
fixup_riscv_lo12_s,
+ // 12-bit fixup corresponding to %gprel_lo(foo) for instructions like addi
+ fixup_riscv_gprel_lo12_i,
+ // 12-bit fixup corresponding to %gprel_lo(foo) for the S-type store
+ // instructions
+ fixup_riscv_gprel_lo12_s,
+ // Fixup corresponding to %gprel_add(foo) for PseudoAddGPRel/PseudoAddUWGPRel, used as a linker hint
+ fixup_riscv_gprel_add,
+ // Fixup corresponding to %gprel_shxadd(foo) for PseudoSh1AddGPRel/PseudoSh2AddGPRel/PseudoSh3AddGPRel/PseudoSh1AddUWGPRel/PseudoSh2AddUWGPRel/PseudoSh3AddUWGPRel, used as a linker hint
+ fixup_riscv_gprel_shxadd,
// 20-bit fixup corresponding to %pcrel_hi(foo) for instructions like auipc
fixup_riscv_pcrel_hi20,
// 12-bit fixup corresponding to %pcrel_lo(foo) for instructions like addi
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
index d030c3b5cf867..69823c5adc1a2 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
@@ -45,6 +45,7 @@ enum {
S_TPREL_LO,
S_CALL_PLT,
S_GOT_HI,
+ S_GPREL_LO,
// Vendor-specific relocation types might conflict across vendors.
// Refer to them using Specifier constants.
S_QC_ABS20,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 4304a1e651ca5..4cead8bf751cf 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -64,6 +64,9 @@ class RISCVMCCodeEmitter : public MCCodeEmitter {
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ void expandAddGPRel(const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
void expandLongCondBr(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -260,6 +263,68 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI,
support::endian::write(CB, Binary, llvm::endianness::little);
}
+static std::pair<unsigned, uint16_t> getAddOpAndFixups(unsigned AddOp) {
+ switch (AddOp) {
+ default:
+ llvm_unreachable("Unexpected ADD or SHXADD Opcode on GP-relative!");
+ case RISCV::PseudoAddGPRel:
+ return std::make_pair(RISCV::ADD, ELF::R_RISCV_GPREL_ADD);
+ case RISCV::PseudoAddUWGPRel:
+ return std::make_pair(RISCV::ADD_UW, ELF::R_RISCV_GPREL_ADD);
+ case RISCV::PseudoSh1AddGPRel:
+ return std::make_pair(RISCV::SH1ADD, ELF::R_RISCV_GPREL_SHXADD);
+ case RISCV::PseudoSh2AddGPRel:
+ return std::make_pair(RISCV::SH2ADD, ELF::R_RISCV_GPREL_SHXADD);
+ case RISCV::PseudoSh3AddGPRel:
+ return std::make_pair(RISCV::SH3ADD, ELF::R_RISCV_GPREL_SHXADD);
+ case RISCV::PseudoSh1AddUWGPRel:
+ return std::make_pair(RISCV::SH1ADD_UW, ELF::R_RISCV_GPREL_SHXADD);
+ case RISCV::PseudoSh2AddUWGPRel:
+ return std::make_pair(RISCV::SH2ADD_UW, ELF::R_RISCV_GPREL_SHXADD);
+ case RISCV::PseudoSh3AddUWGPRel:
+ return std::make_pair(RISCV::SH3ADD_UW, ELF::R_RISCV_GPREL_SHXADD);
+ }
+}
+
+// PseudoAddGPRel/PseudoAddUWGPRel/PseudoSh1AddGPRel/PseudoSh2AddGPRel/PseudoSh3AddGPRel/PseudoSh1AddUWGPRel/PseudoSh2AddUWGPRel/PseudoSh3AddUWGPRel to a simple ADD or SHXADD with the correct relocation.
+void RISCVMCCodeEmitter::expandAddGPRel(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ MCOperand DestReg = MI.getOperand(0);
+ // If the global array can be accessed by GP, src1 or src2 needs to be
+ // replaced with X3 reg in link time.
+ MCOperand Src1 = MI.getOperand(1);
+ MCOperand Src2 = MI.getOperand(2);
+
+ MCOperand SrcSymbol = MI.getOperand(3);
+ assert(SrcSymbol.isExpr() &&
+ "Expected expression as third input to GP-relative add");
+
+ const auto *Expr = dyn_cast<MCSpecifierExpr>(SrcSymbol.getExpr());
+ assert(Expr &&
+ (Expr->getSpecifier() == ELF::R_RISCV_GPREL_ADD ||
+ Expr->getSpecifier() == ELF::R_RISCV_GPREL_SHXADD) &&
+ "Expected gprel_add or gprel_shxadd relocation on GP-relative symbol");
+
+ std::pair<unsigned, uint16_t> Res = getAddOpAndFixups(MI.getOpcode());
+
+ // Emit the correct gprel_add or gprel_shxadd relocation for the symbol.
+ addFixup(Fixups, 0, Expr, Res.second);
+
+ // Emit fixup_riscv_relax for gprel_add where the relax feature is enabled.
+ if (STI.hasFeature(RISCV::FeatureRelax)) {
+ Fixups.back().setLinkerRelaxable();
+ }
+
+ // Emit a normal ADD or SHXADD instruction with the given operands.
+ MCInst TmpInst =
+ MCInstBuilder(Res.first).addOperand(DestReg).addOperand(Src1).addOperand(
+ Src2);
+ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ support::endian::write(CB, Binary, llvm::endianness::little);
+}
+
static unsigned getInvertedBranchOp(unsigned BrOp) {
switch (BrOp) {
default:
@@ -440,6 +505,17 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
expandAddTPRel(MI, CB, Fixups, STI);
MCNumEmitted += 1;
return;
+ case RISCV::PseudoAddGPRel:
+ case RISCV::PseudoAddUWGPRel:
+ case RISCV::PseudoSh1AddGPRel:
+ case RISCV::PseudoSh2AddGPRel:
+ case RISCV::PseudoSh3AddGPRel:
+ case RISCV::PseudoSh1AddUWGPRel:
+ case RISCV::PseudoSh2AddUWGPRel:
+ case RISCV::PseudoSh3AddUWGPRel:
+ expandAddGPRel(MI, CB, Fixups, STI);
+ MCNumEmitted += 1;
+ return;
case RISCV::PseudoLongBEQ:
case RISCV::PseudoLongBNE:
case RISCV::PseudoLongBLT:
@@ -636,12 +712,16 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
"invalid specifier");
break;
case ELF::R_RISCV_TPREL_ADD:
+ case ELF::R_RISCV_GPREL_ADD:
+ case ELF::R_RISCV_GPREL_SHXADD:
// tprel_add is only used to indicate that a relocation should be emitted
// for an add instruction used in TP-relative addressing. It should not be
// expanded as if representing an actual instruction operand and so to
// encounter it here is an error.
- llvm_unreachable(
- "ELF::R_RISCV_TPREL_ADD should not represent an instruction operand");
+
+ llvm_unreachable("ELF::R_RISCV_TPREL_ADD or ELF::R_RISCV_GPREL_ADD or "
+ "ELF::R_RISCV_GPREL_SHXADD should not represent an "
+ "instruction operand");
case RISCV::S_LO:
if (MIFrm == RISCVII::InstFormatI)
FixupKind = RISCV::fixup_riscv_lo12_i;
@@ -651,6 +731,15 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
llvm_unreachable("VK_LO used with unexpected instruction format");
RelaxCandidate = true;
break;
+ case RISCV::S_GPREL_LO:
+ if (MIFrm == RISCVII::InstFormatI)
+ FixupKind = RISCV::fixup_riscv_gprel_lo12_i;
+ else if (MIFrm == RISCVII::InstFormatS)
+ FixupKind = RISCV::fixup_riscv_gprel_lo12_s;
+ else
+ llvm_unreachable("S_GPREL_LO used with unexpected instruction format");
+ RelaxCandidate = true;
+ break;
case ELF::R_RISCV_HI20:
FixupKind = RISCV::fixup_riscv_hi20;
RelaxCandidate = true;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index e6366af9163e6..f1a99669c72de 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -24,6 +24,9 @@ RISCV::Specifier RISCV::parseSpecifierName(StringRef name) {
return StringSwitch<RISCV::Specifier>(name)
.Case("lo", RISCV::S_LO)
.Case("hi", ELF::R_RISCV_HI20)
+ .Case("gprel_lo", RISCV::S_GPREL_LO)
+ .Case("gprel_add", ELF::R_RISCV_GPREL_ADD)
+ .Case("gprel_shxadd", ELF::R_RISCV_GPREL_SHXADD)
.Case("pcrel_lo", RISCV::S_PCREL_LO)
.Case("pcrel_hi", RISCV::S_PCREL_HI)
.Case("got_pcrel_hi", RISCV::S_GOT_HI)
@@ -51,6 +54,12 @@ StringRef RISCV::getSpecifierName(Specifier S) {
return "lo";
case ELF::R_RISCV_HI20:
return "hi";
+ case RISCV::S_GPREL_LO:
+ return "gprel_lo";
+ case ELF::R_RISCV_GPREL_ADD:
+ return "gprel_add";
+ case ELF::R_RISCV_GPREL_SHXADD:
+ return "gprel_shxadd";
case RISCV::S_PCREL_LO:
return "pcrel_lo";
case RISCV::S_PCREL_HI:
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index eb15227a72a83..3c1329677fda2 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -984,6 +984,15 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
case RISCVII::MO_HI:
Kind = ELF::R_RISCV_HI20;
break;
+ case RISCVII::MO_GPREL_LO:
+ Kind = RISCV::S_GPREL_LO;
+ break;
+ case RISCVII::MO_GPREL_ADD:
+ Kind = ELF::R_RISCV_GPREL_ADD;
+ break;
+ case RISCVII::MO_GPREL_SHXADD:
+ Kind = ELF::R_RISCV_GPREL_SHXADD;
+ break;
case RISCVII::MO_PCREL_LO:
Kind = RISCV::S_PCREL_LO;
break;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index b49639f3b9e1e..fb06c25235c51 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -356,6 +356,17 @@ class PseudoStore<string opcodestr, DAGOperand rsty = GPR>
let isAsmParserOnly = 1;
}
+// Pseudo add or shxadd instructions.
+class PseudoGPRel<string opcodestr>
+ : Pseudo<(outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, gprel_add_symbol:$src), [], opcodestr,
+ "$rd, $rs1, $rs2, $src"> {
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let isCodeGenOnly = 0;
+}
+
// Instruction formats are listed in the order they appear in the RISC-V
// instruction set manual (R, R4, I, S, B, U, J).
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index e6fbd5c182ee8..b81cd8df5090b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3655,6 +3655,9 @@ RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_CALL, "riscv-call"},
{MO_LO, "riscv-lo"},
{MO_HI, "riscv-hi"},
+ {MO_GPREL_LO, "riscv-gprel-lo"},
+ {MO_GPREL_ADD, "riscv-gprel-add"},
+ {MO_GPREL_SHXADD, "riscv-gprel-shxadd"},
{MO_PCREL_LO, "riscv-pcrel-lo"},
{MO_PCREL_HI, "riscv-pcrel-hi"},
{MO_GOT_HI, "riscv-got-hi"},
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index eb276ef17da75..2352c26c5c750 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -476,6 +476,19 @@ def tprel_add_symbol : Operand<XLenVT> {
let ParserMatchClass = TPRelAddSymbol;
}
+def GPRelAddSymbol : AsmOperandClass {
+ let Name = "GPRelAddSymbol";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidGPRelAddSymbol";
+ let DiagnosticString = "operand must be a symbol with %gprel_add or %gprel_shxadd specifier";
+ let ParserMethod = "parseOperandWithSpecifier";
+}
+
+// A symbol with the %gprel_add variant.
+def gprel_add_symbol : Operand<XLenVT> {
+ let ParserMatchClass = GPRelAddSymbol;
+}
+
def CSRSystemRegister : AsmOperandClass {
let Name = "CSRSystemRegister";
let ParserMethod = "parseCSRSystemRegister";
@@ -1536,6 +1549,12 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
(ins GPR:$rs1, GPR:$rs2, tprel_add_symbol:$src), [],
"add", "$rd, $rs1, $rs2, $src">;
+// This is a special case of the ADD instruction used to facilitate the use of a
+// fourth operand to emit a relocation on a symbol relating to this instruction.
+// The relocation does not affect any bits of the instruction itself but is used
+// as a hint to the linker.
+def PseudoAddGPRel : PseudoGPRel<"add">;
+
/// FrameIndex calculations
// Transforms frameindex -> tframeindex.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 2faaf887c3037..64f79986917dd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -258,6 +258,23 @@ class RVBShift_ri<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode,
(ins GPR:$rs1, uimmlog2xlen:$shamt), opcodestr,
"$rd, $rs1, $shamt">;
+// This is a special case of the SH1ADD/SH2ADD/SH3ADD instruction used to
+// facilitate the use of a fourth operand to emit a relocation on a symbol
+// relating to this instruction. The relocation does not affect any bits of the
+// instruction itself but is used as a hint to the linker.
+let Predicates = [HasStdExtZba] in {
+ def PseudoSh1AddGPRel : PseudoGPRel<"sh1add">;
+ def PseudoSh2AddGPRel : PseudoGPRel<"sh2add">;
+ def PseudoSh3AddGPRel : PseudoGPRel<"sh3add">;
+}
+
+let Predicates = [HasStdExtZba, IsRV64] in {
+ def PseudoAddUWGPRel : PseudoGPRel<"add.uw">;
+ def PseudoSh1AddUWGPRel : PseudoGPRel<"sh1add.uw">;
+ def PseudoSh2AddUWGPRel : PseudoGPRel<"sh2add.uw">;
+ def PseudoSh3AddUWGPRel : PseudoGPRel<"sh3add.uw">;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index d7ad4c14468ee..46260fbd5b7b7 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -42,6 +42,7 @@ class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
MachineInstr &TailShXAdd, Register GSReg);
+ bool foldGPIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
bool foldShxaddIntoScaledMemory(MachineInstr &Hi, MachineInstr &Lo);
@@ -692,6 +693,260 @@ bool RISCVMergeBaseOffsetOpt::foldShxaddIntoScaledMemory(MachineInstr &Hi,
return true;
}
+
+static bool isShxadd(MachineInstr &MI) {
+ if (MI.getOpcode() == RISCV::SH1ADD || MI.getOpcode() == RISCV::SH2ADD ||
+ MI.getOpcode() == RISCV::SH3ADD || MI.getOpcode() == RISCV::SH1ADD_UW ||
+ MI.getOpcode() == RISCV::SH2ADD_UW || MI.getOpcode() == RISCV::SH3ADD_UW)
+ return true;
+ return false;
+}
+
+// Returns the shift amount from a SHXADD instruction. Returns 0 if the
+// instruction is not a SHXADD.
+static unsigned getSHXADDShiftAmount(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return 0;
+ case RISCV::SH1ADD:
+ case RISCV::SH1ADD_UW:
+ return 1;
+ case RISCV::SH2ADD:
+ case RISCV::SH2ADD_UW:
+ return 2;
+ case RISCV::SH3ADD:
+ case RISCV::SH3ADD_UW:
+ return 3;
+ }
+}
+
+static std::pair<unsigned, unsigned>
+getTargetFlagsAndPattern(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case RISCV::ADD:
+ return std::make_pair(RISCVII::MO_GPREL_ADD, RISCV::PseudoAddGPRel);
+ case RISCV::ADD_UW:
+ return std::make_pair(RISCVII::MO_GPREL_ADD, RISCV::PseudoAddUWGPRel);
+ case RISCV::SH1ADD:
+ return std::make_pair(RISCVII::MO_GPREL_SHXADD, RISCV::PseudoSh1AddGPRel);
+ case RISCV::SH2ADD:
+ return std::make_pair(RISCVII::MO_GPREL_SHXADD, RISCV::PseudoSh2AddGPRel);
+ case RISCV::SH3ADD:
+ return std::make_pair(RISCVII::MO_GPREL_SHXADD, RISCV::PseudoSh3AddGPRel);
+ case RISCV::SH1ADD_UW:
+ return std::make_pair(RISCVII::MO_GPREL_SHXADD, RISCV::PseudoSh1AddUWGPRel);
+ case RISCV::SH2ADD_UW:
+ return std::make_pair(RISCVII::MO_GPREL_SHXADD, RISCV::PseudoSh2AddUWGPRel);
+ case RISCV::SH3ADD_UW:
+ return std::make_pair(RISCVII::MO_GPREL_SHXADD, RISCV::PseudoSh3AddUWGPRel);
+ default:
+ llvm_unreachable("Unexpected ADD or SHXADD Opcode");
+ }
+}
+
+// Use add or shxadd instruction to store the offset in a register.
+// Base address lowering is of the form:
+// Hi: lui vr1, %hi(s)
+// Lo: addi vr2, vreg1, %lo(s)
+// / \
+// / \
+// / \
+// / \
+// vrx(voff): add vr3, vr2, vrx Shxadd vr3, vrx, vr2
+// \ /
+// \ /
+// \ /
+// \ /
+// MemOps vr4, voff(vr3)
+//
+// If the add/shxadd of uses are MemOps with the same offset, we can transform:
+// ------------------- Format after Transform --------------------
+//
+// Hi: lui vr1, %hi(s+voff)
+// / \
+// / \
+// / \
+// / \
+// Add the %gprel_add/%gprel_shxadd/%gprel_lo used as a linker
+// add vr3,vr2,vrx,%gprel_add(s+voff) shxadd vr3,vrx,vr2,%gprel_shxadd(s+voff)
+// \ /
+// \ /
+// \ /
+// \ /
+// MemOps vr4, %gprel_lo(s+voff)(vr3)
+//
+// If the global variable is placed in the gp addressable range, We can complete
+// the operation with fewer instructions
+//
+bool RISCVMergeBaseOffsetOpt::foldGPIntoMemoryOps(MachineInstr &Hi,
+ MachineInstr &Lo) {
+ Register LoDstReg = Lo.getOperand(0).getReg();
+
+ // Can't fold if the register has more than one use
+ if (!LoDstReg.isVirtual() || !MRI->hasOneUse(LoDstReg))
+ return false;
+
+ MachineInstr &AddMI = *MRI->use_instr_begin(LoDstReg);
+ if (isShxadd(AddMI) && LoDstReg != AddMI.getOperand(2).getReg())
+ return false;
+
+ // get the addressing register of MemOps
+ Register AddDstReg;
+ if (AddMI.getOpcode() == RISCV::ADD || AddMI.getOpcode() == RISCV::ADD_UW || isShxadd(AddMI))
+ AddDstReg = AddMI.getOperand(0).getReg();
+
+ if (!AddDstReg)
+ return false;
+
+ std::optional<int64_t> CommonOffset;
+ for (const MachineInstr &UseMI : MRI->use_instructions(AddDstReg)) {
+ switch (UseMI.getOpcode()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
+ return false;
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD: {
+ if (UseMI.getOperand(1).isFI())
+ return false;
+ // Register defined by Lo should not be the value register.
+ if (AddDstReg == UseMI.getOperand(0).getReg())
+ return false;
+ assert(AddDstReg == UseMI.getOperand(1).getReg() &&
+ "Expected base address use");
+ // All load/store instructions must use the same offset.
+ int64_t Offset = UseMI.getOperand(2).getImm();
+ if (CommonOffset && Offset != CommonOffset)
+ return false;
+ CommonOffset = Offset;
+ break;
+ }
+ }
+ }
+
+ // We found a common offset.
+ // Update the offsets in global address lowering.
+ // We may have already folded some arithmetic so we need to add to any
+ // existing offset.
+ int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset;
+ // RV32 ignores the upper 32 bits.
+ if (!ST->is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+ // We can only fold simm32 offsets.
+ if (!isInt<32>(NewOffset))
+ return false;
+
+ // Remove of the addi from add or shxadd, as:
+ // addi vrx, vr0, offAddi
+ // lui vr1, %hi(s)
+ // addi vr1, vr1, %lo(s)
+ // add vr2, vrxx, vr1
+ // memops vr3, off(vr2)
+ // ----Transform----
+ // lui vr1, %hi(s+off+offAddi)
+ // add vr2, vr0, vr1, %gprel_add(s+off+offAddi)
+ // memops vr3, %gprel_lo(s+off+offAddi)(vr2)
+ int64_t OffAddi = 0;
+ bool AddiToRemove = false;
+ if (AddMI.getOpcode() == RISCV::ADD || AddMI.getOpcode() == RISCV::ADD_UW || isShxadd(AddMI)) {
+ Register Rs = AddMI.getOperand(1).getReg();
+ Register Rt = AddMI.getOperand(2).getReg();
+ Register Reg = Rs == LoDstReg ? Rt : Rs;
+ MachineInstr &AddiOfAdd = *MRI->getVRegDef(Reg);
+ if (MRI->hasOneUse(Reg) && Reg.isVirtual() &&
+ (AddiOfAdd.getOpcode() == RISCV::ADDI ||
+ AddiOfAdd.getOpcode() == RISCV::ADDIW) &&
+ AddiOfAdd.getOperand(2).isImm() && AddiOfAdd.getOperand(1).isReg()) {
+ OffAddi = AddiOfAdd.getOperand(2).getImm();
+ if (!ST->is64Bit())
+ OffAddi = SignExtend64<32>(OffAddi);
+ // We can only fold simm32 offsets.
+ assert(isInt<12>(OffAddi) && "Unexpected offset");
+ unsigned ShiftAmt = getSHXADDShiftAmount(AddMI.getOpcode());
+ OffAddi <<= ShiftAmt;
+ // Only NewOffset + OffAddi < sim12, that we can remove the Addi
+ if (isInt<32>(NewOffset + OffAddi)) {
+ NewOffset += OffAddi;
+ AddiToRemove = true;
+ }
+ }
+
+ // Update the Offsets of the symbol of the %hi
+ Hi.getOperand(1).setOffset(NewOffset);
+ // Expand PseudoMovAddr into LUI
+ if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
+ auto *TII = ST->getInstrInfo();
+ Hi.setDesc(TII->get(RISCV::LUI));
+ Hi.removeOperand(2);
+ }
+ // Update the Offsets of the symbol of the %lo, which will be lowring the
+ // MemOps
+ MachineOperand &ImmOp = Lo.getOperand(1);
+ ImmOp.setOffset(NewOffset);
+
+ if (AddiToRemove) {
+ LLVM_DEBUG(dbgs() << "To remove the Inst is: " << AddiOfAdd);
+ MRI->replaceRegWith(AddiOfAdd.getOperand(0).getReg(),
+ AddiOfAdd.getOperand(1).getReg());
+ AddiOfAdd.eraseFromParent();
+ }
+
+ // ADD/SHXADD instruction add the fourth operand used to facilitate the
+ // use to emit a relocation on a symbol relating to this instruction
+ for (MachineInstr &UseMI :
+ llvm::make_early_inc_range(MRI->use_instructions(LoDstReg))) {
+ std::pair<unsigned, unsigned> Res = getTargetFlagsAndPattern(UseMI);
+ // Considering the implementation of gcc, the assembly output is unified
+ // here to adapt to GNU LD and LLD implementations.
+ Register Rt = UseMI.getOperand(1).getReg();
+ if (Rt == LoDstReg && !isShxadd(UseMI)) {
+ MachineOperand UseOp1 = UseMI.getOperand(1);
+ MachineOperand UseOp2 = UseMI.getOperand(2);
+ UseMI.removeOperand(2);
+ UseMI.removeOperand(1);
+ UseMI.addOperand(UseOp2);
+ UseMI.addOperand(UseOp1);
+ }
+ UseMI.addOperand(ImmOp);
+ MachineOperand &MO = UseMI.getOperand(3);
+ MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(), Res.first);
+ auto *TII = ST->getInstrInfo();
+ UseMI.setDesc(TII->get(Res.second));
+ }
+
+ // Update the immediate in the load/store instructions to add the
+ // offset.
+ for (MachineInstr &UseMI :
+ llvm::make_early_inc_range(MRI->use_instructions(AddDstReg))) {
+ MachineOperand &MO = UseMI.getOperand(2);
+ MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(), RISCVII::MO_GPREL_LO);
+ }
+ }
+
+ // Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from
+ // being erased
+ if (&Lo == &Hi)
+ return true;
+
+ MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
+ Lo.eraseFromParent();
+ return true;
+}
+
bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
@@ -706,9 +961,13 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
MachineInstr *Lo = nullptr;
if (!detectFoldable(Hi, Lo))
continue;
- MadeChange |= detectAndFoldOffset(Hi, *Lo);
+ MadeChange |= detectAndFoldOffset(Hi, *Lo);
MadeChange |= foldIntoMemoryOps(Hi, *Lo);
MadeChange |= foldShxaddIntoScaledMemory(Hi, *Lo);
+ // Non-constant addressing of global array subscripts, which can be
+ // increase the optimization scenarios of gp-relax
+ if (Hi.getOpcode() != RISCV::AUIPC && Hi.getOperand(1).isGlobal() && Lo)
+ MadeChange |= foldGPIntoMemoryOps(Hi, *Lo);
}
}
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-with-add-into-memops.ll b/llvm/test/CodeGen/RISCV/fold-addi-with-add-into-memops.ll
new file mode 100644
index 0000000000000..fe0c2177f38e6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fold-addi-with-add-into-memops.ll
@@ -0,0 +1,1232 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge=false -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge=false -mattr=+zba -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32ZBA %s
+; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge=false -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+zba -riscv-enable-global-merge=false -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64ZBA %s
+
+ at g1 = dso_local local_unnamed_addr global [100 x i8] zeroinitializer, align 1
+ at g2 = dso_local local_unnamed_addr global [100 x i16] zeroinitializer, align 2
+ at g3 = dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 4
+ at g4 = dso_local local_unnamed_addr global [100 x i64] zeroinitializer, align 8
+
+define i8 @char_load(i32 %index) {
+; RV32I-LABEL: char_load:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a1, %hi(g1)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32I-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: char_load:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g1)
+; RV32ZBA-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32ZBA-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: char_load:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a1, %hi(g1)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV64I-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: char_load:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g1)
+; RV64ZBA-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV64ZBA-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i8], ptr @g1, i64 0, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1
+ ret i8 %0
+}
+
+define i8 @char_load_offset(i32 %index) {
+; RV32I-LABEL: char_load_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a1, %hi(g1+4)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV32I-NEXT: lbu a0, %gprel_lo(g1+4)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: char_load_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g1+4)
+; RV32ZBA-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV32ZBA-NEXT: lbu a0, %gprel_lo(g1+4)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: char_load_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a1, %hi(g1+4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV64I-NEXT: lbu a0, %gprel_lo(g1+4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: char_load_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g1+4)
+; RV64ZBA-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV64ZBA-NEXT: lbu a0, %gprel_lo(g1+4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i8], ptr @g1, i64 0, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1
+ ret i8 %0
+}
+
+define i8 @char_load_uw(i32 %index) {
+; RV32I-LABEL: char_load_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a1, %hi(g1)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32I-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: char_load_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g1)
+; RV32ZBA-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32ZBA-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: char_load_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: lui a1, %hi(g1)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV64I-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: char_load_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g1)
+; RV64ZBA-NEXT: add.uw a0, a0, a1, %gprel_add(g1)
+; RV64ZBA-NEXT: lbu a0, %gprel_lo(g1)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i8], ptr @g1, i64 0, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1
+ ret i8 %0
+}
+
+define i16 @short_load(i32 %index) {
+; RV32I-LABEL: short_load:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV32I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: short_load:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV32ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: short_load:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: lui a1, %hi(g2)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV64I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: short_load:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2)
+; RV64ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV64ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ ret i16 %0
+}
+
+define i16 @short_load_offset(i32 %index) {
+; RV32I-LABEL: short_load_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2+8)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2+8)
+; RV32I-NEXT: lh a0, %gprel_lo(g2+8)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: short_load_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2+8)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2+8)
+; RV32ZBA-NEXT: lh a0, %gprel_lo(g2+8)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: short_load_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: lui a1, %hi(g2+8)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2+8)
+; RV64I-NEXT: lh a0, %gprel_lo(g2+8)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: short_load_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2+8)
+; RV64ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2+8)
+; RV64ZBA-NEXT: lh a0, %gprel_lo(g2+8)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ ret i16 %0
+}
+
+define i16 @short_load_uw(i32 %index) {
+; RV32I-LABEL: short_load_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV32I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: short_load_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV32ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: short_load_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 31
+; RV64I-NEXT: lui a1, %hi(g2)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV64I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: short_load_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2)
+; RV64ZBA-NEXT: sh1add.uw a0, a0, a1, %gprel_shxadd(g2)
+; RV64ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ ret i16 %0
+}
+
+define i32 @int_load(i32 %index) {
+; RV32I-LABEL: int_load:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: lui a1, %hi(g3)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV32I-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: int_load:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g3)
+; RV32ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3)
+; RV32ZBA-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: int_load:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: lui a1, %hi(g3)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV64I-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: int_load:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g3)
+; RV64ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3)
+; RV64ZBA-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i32], ptr @g3, i64 0, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 4
+ ret i32 %0
+}
+
+define i32 @int_load_offset(i32 %index) {
+; RV32I-LABEL: int_load_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: lui a1, %hi(g3+16)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g3+16)
+; RV32I-NEXT: lw a0, %gprel_lo(g3+16)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: int_load_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g3+16)
+; RV32ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3+16)
+; RV32ZBA-NEXT: lw a0, %gprel_lo(g3+16)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: int_load_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: lui a1, %hi(g3+16)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g3+16)
+; RV64I-NEXT: lw a0, %gprel_lo(g3+16)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: int_load_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g3+16)
+; RV64ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3+16)
+; RV64ZBA-NEXT: lw a0, %gprel_lo(g3+16)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i32], ptr @g3, i64 0, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 4
+ ret i32 %0
+}
+
+define i32 @int_load_uw(i32 %index) {
+; RV32I-LABEL: int_load_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: lui a1, %hi(g3)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV32I-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: int_load_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g3)
+; RV32ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3)
+; RV32ZBA-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: int_load_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 30
+; RV64I-NEXT: lui a1, %hi(g3)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV64I-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: int_load_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g3)
+; RV64ZBA-NEXT: sh2add.uw a0, a0, a1, %gprel_shxadd(g3)
+; RV64ZBA-NEXT: lw a0, %gprel_lo(g3)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i32], ptr @g3, i64 0, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 4
+ ret i32 %0
+}
+
+define i64 @long_long_load(i32 %index) {
+; RV32I-LABEL: long_long_load:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a1, a1, a0
+; RV32I-NEXT: lw a0, 0(a1)
+; RV32I-NEXT: lw a1, 4(a1)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: long_long_load:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a1, a0, a1
+; RV32ZBA-NEXT: lw a0, 0(a1)
+; RV32ZBA-NEXT: lw a1, 4(a1)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: long_long_load:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4)
+; RV64I-NEXT: ld a0, %gprel_lo(g4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: long_long_load:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: sh3add a0, a0, a1, %gprel_shxadd(g4)
+; RV64ZBA-NEXT: ld a0, %gprel_lo(g4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+define i64 @long_long_load_offset(i32 %index) {
+; RV32I-LABEL: long_long_load_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: lw a0, 32(a1)
+; RV32I-NEXT: lw a1, 36(a1)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: long_long_load_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a1, a0, a1
+; RV32ZBA-NEXT: lw a0, 32(a1)
+; RV32ZBA-NEXT: lw a1, 36(a1)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: long_long_load_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: lui a1, %hi(g4+32)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4+32)
+; RV64I-NEXT: ld a0, %gprel_lo(g4+32)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: long_long_load_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g4+32)
+; RV64ZBA-NEXT: sh3add a0, a0, a1, %gprel_shxadd(g4+32)
+; RV64ZBA-NEXT: ld a0, %gprel_lo(g4+32)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+define i64 @long_long_load_uw(i32 %index) {
+; RV32I-LABEL: long_long_load_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a1, a1, a0
+; RV32I-NEXT: lw a0, 0(a1)
+; RV32I-NEXT: lw a1, 4(a1)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: long_long_load_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a1, a0, a1
+; RV32ZBA-NEXT: lw a0, 0(a1)
+; RV32ZBA-NEXT: lw a1, 4(a1)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: long_long_load_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 29
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4)
+; RV64I-NEXT: ld a0, %gprel_lo(g4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: long_long_load_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: sh3add.uw a0, a0, a1, %gprel_shxadd(g4)
+; RV64ZBA-NEXT: ld a0, %gprel_lo(g4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+define i64 @add_more_oneuse_load(i32 %index) {
+; RV32I-LABEL: add_more_oneuse_load:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: lw a0, 0(a0)
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: add a1, a1, a0
+; RV32I-NEXT: lw a0, 0(a1)
+; RV32I-NEXT: lw a1, 4(a1)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: add_more_oneuse_load:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a0, a0, a1
+; RV32ZBA-NEXT: lw a0, 0(a0)
+; RV32ZBA-NEXT: sh3add a1, a0, a1
+; RV32ZBA-NEXT: lw a0, 0(a1)
+; RV32ZBA-NEXT: lw a1, 4(a1)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: add_more_oneuse_load:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: addi a1, a1, %lo(g4)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: add_more_oneuse_load:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: sext.w a0, a0
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV64ZBA-NEXT: sh3add a0, a0, a1
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: sh3add a0, a0, a1
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %0
+ %1 = load i64, ptr %arrayidx1, align 8
+ ret i64 %1
+}
+
+define i64 @test_nesting_load(i32 %index) {
+; RV32I-LABEL: test_nesting_load:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV32I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: add a1, a1, a0
+; RV32I-NEXT: lw a0, 0(a1)
+; RV32I-NEXT: lw a1, 4(a1)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: test_nesting_load:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV32ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a1, a0, a1
+; RV32ZBA-NEXT: lw a0, 0(a1)
+; RV32ZBA-NEXT: lw a1, 4(a1)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: test_nesting_load:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: lui a1, %hi(g2)
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV64I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4)
+; RV64I-NEXT: ld a0, %gprel_lo(g4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: test_nesting_load:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2)
+; RV64ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV64ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: sh3add a0, a0, a1, %gprel_shxadd(g4)
+; RV64ZBA-NEXT: ld a0, %gprel_lo(g4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ %idxprom1 = sext i16 %0 to i64
+ %arrayidx2 = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom1
+ %1 = load i64, ptr %arrayidx2, align 8
+ ret i64 %1
+}
+
+define void @char_store(i32 %index) {
+; RV32I-LABEL: char_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a1, %hi(g1)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: char_store:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g1)
+; RV32ZBA-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: char_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a1, %hi(g1)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: char_store:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g1)
+; RV64ZBA-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i8], ptr @g1, i64 0, i64 %idxprom
+ store i8 100, ptr %arrayidx, align 1
+ ret void
+}
+
+define void @char_store_offset(i32 %index) {
+; RV32I-LABEL: char_store_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a1, %hi(g1+4)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sb a1, %gprel_lo(g1+4)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: char_store_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g1+4)
+; RV32ZBA-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sb a1, %gprel_lo(g1+4)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: char_store_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a1, %hi(g1+4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sb a1, %gprel_lo(g1+4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: char_store_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g1+4)
+; RV64ZBA-NEXT: add a0, a0, a1, %gprel_add(g1+4)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sb a1, %gprel_lo(g1+4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i8], ptr @g1, i64 0, i64 %idxprom
+ store i8 100, ptr %arrayidx, align 1
+ ret void
+}
+
+define void @char_store_uw(i32 %index) {
+; RV32I-LABEL: char_store_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a1, %hi(g1)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: char_store_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g1)
+; RV32ZBA-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: char_store_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: lui a1, %hi(g1)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g1)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: char_store_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g1)
+; RV64ZBA-NEXT: add.uw a0, a0, a1, %gprel_add(g1)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sb a1, %gprel_lo(g1)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i8], ptr @g1, i64 0, i64 %idxprom
+ store i8 100, ptr %arrayidx, align 1
+ ret void
+}
+
+define void @short_store(i32 %index) {
+; RV32I-LABEL: short_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: short_store:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: short_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: lui a1, %hi(g2)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: short_store:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2)
+; RV64ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ store i16 100, ptr %arrayidx, align 2
+ ret void
+}
+
+define void @short_store_offset(i32 %index) {
+; RV32I-LABEL: short_store_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2+8)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2+8)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sh a1, %gprel_lo(g2+8)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: short_store_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2+8)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2+8)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sh a1, %gprel_lo(g2+8)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: short_store_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: lui a1, %hi(g2+8)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2+8)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sh a1, %gprel_lo(g2+8)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: short_store_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2+8)
+; RV64ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2+8)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sh a1, %gprel_lo(g2+8)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ store i16 100, ptr %arrayidx, align 2
+ ret void
+}
+
+define void @short_store_uw(i32 %index) {
+; RV32I-LABEL: short_store_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: short_store_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: short_store_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 31
+; RV64I-NEXT: lui a1, %hi(g2)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: short_store_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2)
+; RV64ZBA-NEXT: sh1add.uw a0, a0, a1, %gprel_shxadd(g2)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sh a1, %gprel_lo(g2)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ store i16 100, ptr %arrayidx, align 2
+ ret void
+}
+
+define void @int_store(i32 %index) {
+; RV32I-LABEL: int_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: lui a1, %hi(g3)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: int_store:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g3)
+; RV32ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: int_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: lui a1, %hi(g3)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: int_store:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g3)
+; RV64ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i32], ptr @g3, i64 0, i64 %idxprom
+ store i32 100, ptr %arrayidx, align 4
+ ret void
+}
+
+define void @int_store_offset(i32 %index) {
+; RV32I-LABEL: int_store_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: lui a1, %hi(g3+16)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g3+16)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, %gprel_lo(g3+16)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: int_store_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g3+16)
+; RV32ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3+16)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, %gprel_lo(g3+16)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: int_store_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: lui a1, %hi(g3+16)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g3+16)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sw a1, %gprel_lo(g3+16)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: int_store_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g3+16)
+; RV64ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3+16)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sw a1, %gprel_lo(g3+16)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i32], ptr @g3, i64 0, i64 %idxprom
+ store i32 100, ptr %arrayidx, align 4
+ ret void
+}
+
+define void @int_store_uw(i32 %index) {
+; RV32I-LABEL: int_store_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: lui a1, %hi(g3)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: int_store_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g3)
+; RV32ZBA-NEXT: sh2add a0, a0, a1, %gprel_shxadd(g3)
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: int_store_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 30
+; RV64I-NEXT: lui a1, %hi(g3)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g3)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: int_store_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g3)
+; RV64ZBA-NEXT: sh2add.uw a0, a0, a1, %gprel_shxadd(g3)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sw a1, %gprel_lo(g3)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i32], ptr @g3, i64 0, i64 %idxprom
+ store i32 100, ptr %arrayidx, align 4
+ ret void
+}
+
+define void @long_long_store(i32 %index) {
+; RV32I-LABEL: long_long_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw zero, 4(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: long_long_store:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a0, a0, a1
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, 0(a0)
+; RV32ZBA-NEXT: sw zero, 4(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: long_long_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sd a1, %gprel_lo(g4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: long_long_store:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: sh3add a0, a0, a1, %gprel_shxadd(g4)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sd a1, %gprel_lo(g4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ store i64 100, ptr %arrayidx, align 8
+ ret void
+}
+
+define void @long_long_store_offset(i32 %index) {
+; RV32I-LABEL: long_long_store_offset:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, 32(a0)
+; RV32I-NEXT: sw zero, 36(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: long_long_store_offset:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a0, a0, a1
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, 32(a0)
+; RV32ZBA-NEXT: sw zero, 36(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: long_long_store_offset:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: lui a1, %hi(g4+32)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4+32)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sd a1, %gprel_lo(g4+32)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: long_long_store_offset:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g4+32)
+; RV64ZBA-NEXT: sh3add a0, a0, a1, %gprel_shxadd(g4+32)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sd a1, %gprel_lo(g4+32)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %add = add nsw i32 %index, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ store i64 100, ptr %arrayidx, align 8
+ ret void
+}
+
+define void @long_long_store_uw(i32 %index) {
+; RV32I-LABEL: long_long_store_uw:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw zero, 4(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: long_long_store_uw:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a0, a0, a1
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, 0(a0)
+; RV32ZBA-NEXT: sw zero, 4(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: long_long_store_uw:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 29
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sd a1, %gprel_lo(g4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: long_long_store_uw:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: sh3add.uw a0, a0, a1, %gprel_shxadd(g4)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sd a1, %gprel_lo(g4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = zext i32 %index to i64
+ %arrayidx = getelementptr inbounds nuw [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ store i64 100, ptr %arrayidx, align 8
+ ret void
+}
+
+define void @add_more_oneuse_store(i32 %index) {
+; RV32I-LABEL: add_more_oneuse_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: lw a0, 0(a0)
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw zero, 4(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: add_more_oneuse_store:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a0, a0, a1
+; RV32ZBA-NEXT: lw a0, 0(a0)
+; RV32ZBA-NEXT: sh3add a0, a0, a1
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, 0(a0)
+; RV32ZBA-NEXT: sw zero, 4(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: add_more_oneuse_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: addi a1, a1, %lo(g4)
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sd a1, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: add_more_oneuse_store:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: sext.w a0, a0
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV64ZBA-NEXT: sh3add a0, a0, a1
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: sh3add a0, a0, a1
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sd a1, 0(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %0
+ store i64 100, ptr %arrayidx1, align 8
+ ret void
+}
+
+define void @test_nesting_store(i32 %index) {
+; RV32I-LABEL: test_nesting_store:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: lui a1, %hi(g2)
+; RV32I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV32I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32I-NEXT: lui a1, %hi(g4)
+; RV32I-NEXT: addi a1, a1, %lo(g4)
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 100
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw zero, 4(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBA-LABEL: test_nesting_store:
+; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA-NEXT: lui a1, %hi(g2)
+; RV32ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV32ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV32ZBA-NEXT: lui a1, %hi(g4)
+; RV32ZBA-NEXT: addi a1, a1, %lo(g4)
+; RV32ZBA-NEXT: sh3add a0, a0, a1
+; RV32ZBA-NEXT: li a1, 100
+; RV32ZBA-NEXT: sw a1, 0(a0)
+; RV32ZBA-NEXT: sw zero, 4(a0)
+; RV32ZBA-NEXT: ret
+;
+; RV64I-LABEL: test_nesting_store:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: lui a1, %hi(g2)
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g2)
+; RV64I-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: lui a1, %hi(g4)
+; RV64I-NEXT: add a0, a0, a1, %gprel_add(g4)
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: sd a1, %gprel_lo(g4)(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: test_nesting_store:
+; RV64ZBA: # %bb.0: # %entry
+; RV64ZBA-NEXT: lui a1, %hi(g2)
+; RV64ZBA-NEXT: sh1add a0, a0, a1, %gprel_shxadd(g2)
+; RV64ZBA-NEXT: lh a0, %gprel_lo(g2)(a0)
+; RV64ZBA-NEXT: lui a1, %hi(g4)
+; RV64ZBA-NEXT: sh3add a0, a0, a1, %gprel_shxadd(g4)
+; RV64ZBA-NEXT: li a1, 100
+; RV64ZBA-NEXT: sd a1, %gprel_lo(g4)(a0)
+; RV64ZBA-NEXT: ret
+entry:
+ %idxprom = sext i32 %index to i64
+ %arrayidx = getelementptr inbounds [100 x i16], ptr @g2, i64 0, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ %idxprom1 = sext i16 %0 to i64
+ %arrayidx2 = getelementptr inbounds [100 x i64], ptr @g4, i64 0, i64 %idxprom1
+ store i64 100, ptr %arrayidx2, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
index 2ec8b46cf3eea..adb982ea92a8d 100644
--- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
+++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
@@ -38,13 +38,12 @@ define void @test(i32 signext %i) nounwind {
; RV32-NEXT: blt a3, a1, .LBB0_3
; RV32-NEXT: # %bb.1: # %bb.preheader
; RV32-NEXT: lui a2, %hi(flags2)
-; RV32-NEXT: addi a2, a2, %lo(flags2)
; RV32-NEXT: addi a3, a3, 1
; RV32-NEXT: .LBB0_2: # %bb
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: add a4, a2, a1
+; RV32-NEXT: add a4, a1, a2, %gprel_add(flags2)
; RV32-NEXT: add a1, a1, a0
-; RV32-NEXT: sb zero, 0(a4)
+; RV32-NEXT: sb zero, %gprel_lo(flags2)(a4)
; RV32-NEXT: blt a1, a3, .LBB0_2
; RV32-NEXT: .LBB0_3: # %return
; RV32-NEXT: ret
@@ -56,15 +55,14 @@ define void @test(i32 signext %i) nounwind {
; RV64-NEXT: blt a3, a1, .LBB0_3
; RV64-NEXT: # %bb.1: # %bb.preheader
; RV64-NEXT: lui a2, %hi(flags2)
-; RV64-NEXT: addi a2, a2, %lo(flags2)
; RV64-NEXT: addi a3, a3, 1
; RV64-NEXT: .LBB0_2: # %bb
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: slli a4, a1, 32
; RV64-NEXT: srli a4, a4, 32
-; RV64-NEXT: add a4, a2, a4
+; RV64-NEXT: add a4, a4, a2, %gprel_add(flags2)
; RV64-NEXT: addw a1, a1, a0
-; RV64-NEXT: sb zero, 0(a4)
+; RV64-NEXT: sb zero, %gprel_lo(flags2)(a4)
; RV64-NEXT: blt a1, a3, .LBB0_2
; RV64-NEXT: .LBB0_3: # %return
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/lpad.ll b/llvm/test/CodeGen/RISCV/lpad.ll
index 28873ab6c49a4..4362d648246a4 100644
--- a/llvm/test/CodeGen/RISCV/lpad.ll
+++ b/llvm/test/CodeGen/RISCV/lpad.ll
@@ -14,9 +14,8 @@ define void @indirctbr(i32 %i, ptr %p) {
; RV32-NEXT: lpad 0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: lui a2, %hi(.L__const.indirctbr.addr)
-; RV32-NEXT: addi a2, a2, %lo(.L__const.indirctbr.addr)
-; RV32-NEXT: add a0, a2, a0
-; RV32-NEXT: lw a0, 0(a0)
+; RV32-NEXT: add a0, a0, a2, %gprel_add(.L__const.indirctbr.addr)
+; RV32-NEXT: lw a0, %gprel_lo(.L__const.indirctbr.addr)(a0)
; RV32-NEXT: jr a0
; RV32-NEXT: .p2align 2
; RV32-NEXT: .Ltmp3: # Block address taken
@@ -38,9 +37,8 @@ define void @indirctbr(i32 %i, ptr %p) {
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: lui a2, %hi(.L__const.indirctbr.addr)
-; RV64-NEXT: addi a2, a2, %lo(.L__const.indirctbr.addr)
-; RV64-NEXT: add a0, a2, a0
-; RV64-NEXT: ld a0, 0(a0)
+; RV64-NEXT: add a0, a0, a2, %gprel_add(.L__const.indirctbr.addr)
+; RV64-NEXT: ld a0, %gprel_lo(.L__const.indirctbr.addr)(a0)
; RV64-NEXT: jr a0
; RV64-NEXT: .p2align 2
; RV64-NEXT: .Ltmp3: # Block address taken
@@ -61,9 +59,8 @@ define void @indirctbr(i32 %i, ptr %p) {
; FIXED-ONE-RV32-NEXT: lpad 1
; FIXED-ONE-RV32-NEXT: slli a0, a0, 2
; FIXED-ONE-RV32-NEXT: lui a2, %hi(.L__const.indirctbr.addr)
-; FIXED-ONE-RV32-NEXT: addi a2, a2, %lo(.L__const.indirctbr.addr)
-; FIXED-ONE-RV32-NEXT: add a0, a2, a0
-; FIXED-ONE-RV32-NEXT: lw a0, 0(a0)
+; FIXED-ONE-RV32-NEXT: add a0, a0, a2, %gprel_add(.L__const.indirctbr.addr)
+; FIXED-ONE-RV32-NEXT: lw a0, %gprel_lo(.L__const.indirctbr.addr)(a0)
; FIXED-ONE-RV32-NEXT: lui t2, 1
; FIXED-ONE-RV32-NEXT: jr a0
; FIXED-ONE-RV32-NEXT: .p2align 2
@@ -86,9 +83,8 @@ define void @indirctbr(i32 %i, ptr %p) {
; FIXED-ONE-RV64-NEXT: sext.w a0, a0
; FIXED-ONE-RV64-NEXT: slli a0, a0, 3
; FIXED-ONE-RV64-NEXT: lui a2, %hi(.L__const.indirctbr.addr)
-; FIXED-ONE-RV64-NEXT: addi a2, a2, %lo(.L__const.indirctbr.addr)
-; FIXED-ONE-RV64-NEXT: add a0, a2, a0
-; FIXED-ONE-RV64-NEXT: ld a0, 0(a0)
+; FIXED-ONE-RV64-NEXT: add a0, a0, a2, %gprel_add(.L__const.indirctbr.addr)
+; FIXED-ONE-RV64-NEXT: ld a0, %gprel_lo(.L__const.indirctbr.addr)(a0)
; FIXED-ONE-RV64-NEXT: lui t2, 1
; FIXED-ONE-RV64-NEXT: jr a0
; FIXED-ONE-RV64-NEXT: .p2align 2
diff --git a/llvm/test/MC/RISCV/Relocations/relocations.s b/llvm/test/MC/RISCV/Relocations/relocations.s
index 42cdfe338fe54..ac80ece513de2 100644
--- a/llvm/test/MC/RISCV/Relocations/relocations.s
+++ b/llvm/test/MC/RISCV/Relocations/relocations.s
@@ -51,6 +51,14 @@ addi t1, t1, %tprel_lo(foo+4)
# RELOC: R_RISCV_TPREL_LO12_I foo 0x4
# INSTR: addi t1, t1, %tprel_lo(foo+4)
+addi t1, t1, %gprel_lo(foo)
+# RELOC: R_RISCV_GPREL_LO12_I foo 0x0
+# INSTR: addi t1, t1, %gprel_lo(foo)
+
+addi t1, t1, %gprel_lo(foo+4)
+# RELOC: R_RISCV_GPREL_LO12_I foo 0x4
+# INSTR: addi t1, t1, %gprel_lo(foo+4)
+
sb t1, %lo(foo)(a2)
# RELOC: R_RISCV_LO12_S foo 0x0
# INSTR: sb t1, %lo(foo)(a2)
@@ -67,6 +75,14 @@ sb t1, %tprel_lo(foo+4)(a2)
# RELOC: R_RISCV_TPREL_LO12_S foo 0x4
# INSTR: sb t1, %tprel_lo(foo+4)(a2)
+sb t1, %gprel_lo(foo)(a2)
+# RELOC: R_RISCV_GPREL_LO12_S foo 0x0
+# INSTR: sb t1, %gprel_lo(foo)(a2)
+
+sb t1, %gprel_lo(foo+4)(a2)
+# RELOC: R_RISCV_GPREL_LO12_S foo 0x4
+# INSTR: sb t1, %gprel_lo(foo+4)(a2)
+
.L0:
auipc t1, %pcrel_hi(foo)
# RELOC: R_RISCV_PCREL_HI20 foo 0x0
@@ -142,6 +158,38 @@ add t1, t1, tp, %tprel_add(foo)
# RELOC: R_RISCV_TPREL_ADD foo 0x0
# INSTR: add t1, t1, tp, %tprel_add(foo)
+add t1, t1, t2, %gprel_add(foo)
+# RELOC: R_RISCV_GPREL_ADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_add(foo)
+
+add.uw t1, t1, t2, %gprel_add(foo)
+# RELOC: R_RISCV_GPREL_ADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_add(foo)
+
+sh1add t1, t1, t2, %gprel_shxadd(foo)
+# RELOC: R_RISCV_GPREL_SHXADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_shxadd(foo)
+
+sh1add.uw t1, t1, t2, %gprel_shxadd(foo)
+# RELOC: R_RISCV_GPREL_SHXADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_shxadd(foo)
+
+sh2add t1, t1, t2, %gprel_shxadd(foo)
+# RELOC: R_RISCV_GPREL_SHXADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_shxadd(foo)
+
+sh2add.uw t1, t1, t2, %gprel_shxadd(foo)
+# RELOC: R_RISCV_GPREL_SHXADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_shxadd(foo)
+
+sh3add t1, t1, t2, %gprel_shxadd(foo)
+# RELOC: R_RISCV_GPREL_SHXADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_shxadd(foo)
+
+sh3add.uw t1, t1, t2, %gprel_shxadd(foo)
+# RELOC: R_RISCV_GPREL_SHXADD foo 0x0
+# INSTR: add t1, t1, t2, %gprel_shxadd(foo)
+
jal zero, foo
# RELOC: R_RISCV_JAL
# INSTR: jal zero, foo
diff --git a/llvm/test/MC/RISCV/rv32zba-invalid.s b/llvm/test/MC/RISCV/rv32zba-invalid.s
index 0efc958c98e01..aa194cb0f5ac1 100644
--- a/llvm/test/MC/RISCV/rv32zba-invalid.s
+++ b/llvm/test/MC/RISCV/rv32zba-invalid.s
@@ -11,3 +11,8 @@ add.uw t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the followi
sh1add.uw t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
sh2add.uw t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
sh3add.uw t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+
+# GP-relative symbol names require a %gprel_add modifier.
+sh1add a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:20: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+sh2add a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:20: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+sh3add a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:20: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
diff --git a/llvm/test/MC/RISCV/rv64zba-invalid.s b/llvm/test/MC/RISCV/rv64zba-invalid.s
index 358d8b3a6e11f..02da19e66f1a2 100644
--- a/llvm/test/MC/RISCV/rv64zba-invalid.s
+++ b/llvm/test/MC/RISCV/rv64zba-invalid.s
@@ -13,3 +13,12 @@ sh1add.uw t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
sh2add.uw t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
# Too few operands
sh3add.uw t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
+
+# GP-relative symbol names require a %gprel_add modifier.
+sh1add a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:20: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+sh2add a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:20: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+sh3add a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:20: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+add.uw a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:20: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+sh1add.uw a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:23: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+sh2add.uw a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:23: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
+sh3add.uw a0, a0, a1, %hi(foo) # CHECK: :[[@LINE]]:23: error: operand must be a symbol with %gprel_add or %gprel_shxadd specifier
diff --git a/llvm/test/MC/RISCV/rv64zbb-invalid.s b/llvm/test/MC/RISCV/rv64zbb-invalid.s
index c4e5bacb7c39e..d30a617cb1472 100644
--- a/llvm/test/MC/RISCV/rv64zbb-invalid.s
+++ b/llvm/test/MC/RISCV/rv64zbb-invalid.s
@@ -16,4 +16,4 @@ roriw t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
roriw t0, t1, 32 # CHECK: :[[@LINE]]:15: error: immediate must be an integer in the range [0, 31]
roriw t0, t1, -1 # CHECK: :[[@LINE]]:15: error: immediate must be an integer in the range [0, 31]
rori t0, t1, 64 # CHECK: :[[@LINE]]:14: error: immediate must be an integer in the range [0, 63]
-rori t0, t1, -1 # CHECK: :[[@LINE]]:14: error: immediate must be an integer in the range [0, 63]
+rori t0, t1, -1 # CHECK: :[[@LINE]]:14: error: immediate must be an integer in the range [0, 63]
\ No newline at end of file
More information about the llvm-commits
mailing list