[clang] 3ac9fe6 - [RISCV] CodeGen of RVE and ilp32e/lp64e ABIs (#76777)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 16 04:44:52 PST 2024
Author: Wang Pengcheng
Date: 2024-01-16T20:44:30+08:00
New Revision: 3ac9fe69f70a2b3541266daedbaaa7dc9c007a2a
URL: https://github.com/llvm/llvm-project/commit/3ac9fe69f70a2b3541266daedbaaa7dc9c007a2a
DIFF: https://github.com/llvm/llvm-project/commit/3ac9fe69f70a2b3541266daedbaaa7dc9c007a2a.diff
LOG: [RISCV] CodeGen of RVE and ilp32e/lp64e ABIs (#76777)
This commit includes the necessary changes to clang and LLVM to support
codegen of `RVE` and the `ilp32e`/`lp64e` ABIs.
The differences between `RVE` and `RVI` are:
* `RVE` reduces the integer register count to 16(x0-x16).
* The ABI should be `ilp32e` for 32 bits and `lp64e` for 64 bits.
`RVE` can be combined with all current standard extensions.
The central changes in ilp32e/lp64e ABI, compared to ilp32/lp64 are:
* Only 6 integer argument registers (rather than 8).
* Only 2 callee-saved registers (rather than 12).
* A Stack Alignment of 32bits (rather than 128bits).
* ilp32e isn't compatible with D ISA extension.
If `ilp32e` or `lp64` is used with an ISA that has any of the registers
x16-x31 and f0-f31, then these registers are considered temporaries.
To be compatible with the implementation of ilp32e in GCC, we don't use
aligned registers to pass variadic arguments and set stack alignment\
to 4-bytes for types with length of 2*XLEN.
FastCC is also supported on RVE, while GHC isn't since there is only one
avaiable register.
Differential Revision: https://reviews.llvm.org/D70401
Added:
clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c
llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll
llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll
llvm/test/CodeGen/RISCV/rv32e.ll
llvm/test/CodeGen/RISCV/rv64e.ll
llvm/test/CodeGen/RISCV/vararg-ilp32e.ll
Modified:
clang/docs/ReleaseNotes.rst
clang/lib/Basic/Targets/RISCV.cpp
clang/lib/Basic/Targets/RISCV.h
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/TargetInfo.h
clang/lib/CodeGen/Targets/RISCV.cpp
clang/lib/Driver/ToolChains/Arch/RISCV.cpp
clang/test/CodeGen/RISCV/riscv32-abi.c
clang/test/CodeGen/RISCV/riscv32-vararg.c
clang/test/CodeGen/RISCV/riscv64-abi.c
clang/test/CodeGen/RISCV/riscv64-vararg.c
clang/test/Preprocessor/riscv-target-features.c
llvm/docs/RISCVUsage.rst
llvm/docs/ReleaseNotes.rst
llvm/include/llvm/Support/RISCVAttributes.h
llvm/lib/Support/RISCVISAInfo.cpp
llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
llvm/lib/Target/RISCV/RISCVCallingConv.td
llvm/lib/Target/RISCV/RISCVFeatures.td
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
llvm/lib/Target/RISCV/RISCVFrameLowering.h
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
llvm/test/CodeGen/RISCV/interrupt-attr.ll
llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
llvm/test/CodeGen/RISCV/stack-realignment.ll
llvm/test/CodeGen/RISCV/target-abi-valid.ll
llvm/test/CodeGen/RISCV/vararg.ll
llvm/test/MC/RISCV/option-invalid.s
llvm/test/MC/RISCV/target-abi-invalid.s
Removed:
llvm/test/CodeGen/RISCV/rve.ll
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ea57769a4a5795d..c2440bc4651819b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1060,6 +1060,8 @@ RISC-V Support
^^^^^^^^^^^^^^
- Unaligned memory accesses can be toggled by ``-m[no-]unaligned-access`` or the
aliases ``-m[no-]strict-align``.
+- CodeGen of RV32E/RV64E was supported experimentally.
+- CodeGen of ilp32e/lp64e was supported experimentally.
- Default ABI with F but without D was changed to ilp32f for RV32 and to lp64f
for RV64.
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index fb312b6cf26e02a..942e61b05cda528 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -154,7 +154,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
else
Builder.defineMacro("__riscv_float_abi_soft");
- if (ABIName == "ilp32e")
+ if (ABIName == "ilp32e" || ABIName == "lp64e")
Builder.defineMacro("__riscv_abi_rve");
Builder.defineMacro("__riscv_arch_test");
@@ -214,6 +214,13 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__riscv_misaligned_fast");
else
Builder.defineMacro("__riscv_misaligned_avoid");
+
+ if (ISAInfo->hasExtension("e")) {
+ if (Is64Bit)
+ Builder.defineMacro("__riscv_64e");
+ else
+ Builder.defineMacro("__riscv_32e");
+ }
}
static constexpr Builtin::Info BuiltinInfo[] = {
@@ -378,6 +385,11 @@ bool RISCVTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
if (llvm::is_contained(Features, "+experimental"))
HasExperimental = true;
+ if (ABI == "ilp32e" && ISAInfo->hasExtension("d")) {
+ Diags.Report(diag::err_invalid_feature_combination)
+ << "ILP32E cannot be used with the D ISA extension";
+ return false;
+ }
return true;
}
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index f98c88cd45f8310..bfbdafb682c8513 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -132,6 +132,12 @@ class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo {
}
bool setABI(const std::string &Name) override {
+ if (Name == "ilp32e") {
+ ABI = Name;
+ resetDataLayout("e-m:e-p:32:32-i64:64-n32-S32");
+ return true;
+ }
+
if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") {
ABI = Name;
return true;
@@ -156,6 +162,12 @@ class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
}
bool setABI(const std::string &Name) override {
+ if (Name == "lp64e") {
+ ABI = Name;
+ resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n32:64-S64");
+ return true;
+ }
+
if (Name == "lp64" || Name == "lp64f" || Name == "lp64d") {
ABI = Name;
return true;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 01b042ce5dd1343..482c2108a988a16 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -229,7 +229,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
ABIFLen = 32;
else if (ABIStr.ends_with("d"))
ABIFLen = 64;
- return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen);
+ bool EABI = ABIStr.ends_with("e");
+ return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen, EABI);
}
case llvm::Triple::systemz: {
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 0c0781a2d5ab9df..7682f197041c748 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -496,7 +496,8 @@ createPPC64_SVR4_TargetCodeGenInfo(CodeGenModule &CGM, PPC64_SVR4_ABIKind Kind,
bool SoftFloatABI);
std::unique_ptr<TargetCodeGenInfo>
-createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, unsigned FLen);
+createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, unsigned FLen,
+ bool EABI);
std::unique_ptr<TargetCodeGenInfo>
createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM);
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index 1e1d249b37ac060..0851d1993d0c0f5 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -25,8 +25,9 @@ class RISCVABIInfo : public DefaultABIInfo {
// ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target
// with soft float ABI has FLen==0).
unsigned FLen;
- static const int NumArgGPRs = 8;
- static const int NumArgFPRs = 8;
+ const int NumArgGPRs;
+ const int NumArgFPRs;
+ const bool EABI;
bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
llvm::Type *&Field1Ty,
CharUnits &Field1Off,
@@ -34,8 +35,10 @@ class RISCVABIInfo : public DefaultABIInfo {
CharUnits &Field2Off) const;
public:
- RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen)
- : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {}
+ RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen,
+ bool EABI)
+ : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen), NumArgGPRs(EABI ? 6 : 8),
+ NumArgFPRs(FLen != 0 ? 8 : 0), EABI(EABI) {}
// DefaultABIInfo's classifyReturnType and classifyArgumentType are
// non-virtual, but computeInfo is virtual, so we overload it.
@@ -86,7 +89,7 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
}
int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
- int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
+ int ArgFPRsLeft = NumArgFPRs;
int NumFixedArgs = FI.getNumRequiredArgs();
int ArgNum = 0;
@@ -396,9 +399,12 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
// Determine the number of GPRs needed to pass the current argument
// according to the ABI. 2*XLen-aligned varargs are passed in "aligned"
// register pairs, so may consume 3 registers.
+ // TODO: To be compatible with GCC's behaviors, we don't align registers
+ // currently if we are using ILP32E calling convention. This behavior may be
+ // changed when RV32E/ILP32E is ratified.
int NeededArgGPRs = 1;
if (!IsFixed && NeededAlign == 2 * XLen)
- NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
+ NeededArgGPRs = 2 + (EABI && XLen == 32 ? 0 : (ArgGPRsLeft % 2));
else if (Size > XLen && Size <= 2 * XLen)
NeededArgGPRs = 2;
@@ -480,6 +486,13 @@ Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
auto TInfo = getContext().getTypeInfoInChars(Ty);
+ // TODO: To be compatible with GCC's behaviors, we force arguments with
+ // 2×XLEN-bit alignment and size at most 2×XLEN bits like `long long`,
+ // `unsigned long long` and `double` to have 4-byte alignment. This
+ // behavior may be changed when RV32E/ILP32E is ratified.
+ if (EABI && XLen == 32)
+ TInfo.Align = std::min(TInfo.Align, CharUnits::fromQuantity(4));
+
// Arguments bigger than 2*Xlen bytes are passed indirectly.
bool IsIndirect = TInfo.Width > 2 * SlotSize;
@@ -499,8 +512,9 @@ namespace {
class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
public:
RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
- unsigned FLen)
- : TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {}
+ unsigned FLen, bool EABI)
+ : TargetCodeGenInfo(
+ std::make_unique<RISCVABIInfo>(CGT, XLen, FLen, EABI)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
@@ -526,6 +540,7 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
std::unique_ptr<TargetCodeGenInfo>
CodeGen::createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen,
- unsigned FLen) {
- return std::make_unique<RISCVTargetCodeGenInfo>(CGM.getTypes(), XLen, FLen);
+ unsigned FLen, bool EABI) {
+ return std::make_unique<RISCVTargetCodeGenInfo>(CGM.getTypes(), XLen, FLen,
+ EABI);
}
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index 16a8b3cc42bab4c..a46b44f9ad2b2dd 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -210,6 +210,7 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
// rv32e -> ilp32e
// rv32* -> ilp32
// rv64g | rv64*d -> lp64d
+ // rv64e -> lp64e
// rv64* -> lp64
StringRef Arch = getRISCVArch(Args, Triple);
@@ -285,6 +286,7 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
// 3. Choose a default based on `-mabi=`
//
// ilp32e -> rv32e
+ // lp64e -> rv64e
// ilp32 | ilp32f | ilp32d -> rv32imafdc
// lp64 | lp64f | lp64d -> rv64imafdc
if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
@@ -292,6 +294,8 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
if (MABI.equals_insensitive("ilp32e"))
return "rv32e";
+ else if (MABI.equals_insensitive("lp64e"))
+ return "rv64e";
else if (MABI.starts_with_insensitive("ilp32"))
return "rv32imafdc";
else if (MABI.starts_with_insensitive("lp64")) {
diff --git a/clang/test/CodeGen/RISCV/riscv32-abi.c b/clang/test/CodeGen/RISCV/riscv32-abi.c
index ea1bb3b62ee6fd0..b53f9a9169146b2 100644
--- a/clang/test/CodeGen/RISCV/riscv32-abi.c
+++ b/clang/test/CodeGen/RISCV/riscv32-abi.c
@@ -5,6 +5,8 @@
// RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32F-ILP32D,ILP32-ILP32F,ILP32F %s
// RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d -target-abi ilp32d -emit-llvm %s -o - \
// RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32F-ILP32D,ILP32D %s
+// RUN: %clang_cc1 -triple riscv32 -emit-llvm -target-abi ilp32e %s -o - \
+// RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32-ILP32F,ILP32,ILP32E %s
#include <stddef.h>
#include <stdint.h>
@@ -2064,4 +2066,5 @@ union float16_u f_ret_float16_u(void) {
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// ILP32E: {{.*}}
// ILP32F: {{.*}}
diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c b/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c
new file mode 100644
index 000000000000000..0afe7b025efdbc8
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c
@@ -0,0 +1,4 @@
+// RUN: not %clang_cc1 -triple riscv32 -target-feature +d -emit-llvm -target-abi ilp32e %s 2>&1 \
+// RUN: | FileCheck -check-prefix=ILP32E-WITH-FD %s
+
+// ILP32E-WITH-FD: error: invalid feature combination: ILP32E cannot be used with the D ISA extension
diff --git a/clang/test/CodeGen/RISCV/riscv32-vararg.c b/clang/test/CodeGen/RISCV/riscv32-vararg.c
index 02b1ed38e265567..1c4e41f2f54c8f7 100644
--- a/clang/test/CodeGen/RISCV/riscv32-vararg.c
+++ b/clang/test/CodeGen/RISCV/riscv32-vararg.c
@@ -1,9 +1,11 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \
-// RUN: | FileCheck %s
+// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32F
// RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-feature +f -target-abi ilp32d -emit-llvm %s -o - \
-// RUN: | FileCheck %s
+// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32D
+// RUN: %clang_cc1 -triple riscv32 -target-abi ilp32e -emit-llvm %s -o - \
+// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32E
#include <stddef.h>
#include <stdint.h>
@@ -102,24 +104,60 @@ int f_va_1(char *fmt, ...) {
// used to pass varargs with 2x xlen alignment and 2x xlen size. Ensure the
// correct offsets are used.
-// CHECK-LABEL: define dso_local double @f_va_2
-// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[V:%.*]] = alloca double, align 8
-// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
-// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
-// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
-// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
-// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
-// CHECK-NEXT: store double [[TMP1]], ptr [[V]], align 8
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
-// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8
-// CHECK-NEXT: ret double [[TMP2]]
+// CHECK-ILP32F-LABEL: define dso_local double @f_va_2
+// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32F-NEXT: entry:
+// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8
+// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8
+// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8
+// CHECK-ILP32F-NEXT: ret double [[TMP2]]
+//
+// CHECK-ILP32D-LABEL: define dso_local double @f_va_2
+// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32D-NEXT: entry:
+// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8
+// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8
+// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8
+// CHECK-ILP32D-NEXT: ret double [[TMP2]]
+//
+// CHECK-ILP32E-LABEL: define dso_local double @f_va_2
+// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32E-NEXT: entry:
+// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8
+// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4
+// CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8
+// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load double, ptr [[V]], align 8
+// CHECK-ILP32E-NEXT: ret double [[TMP1]]
//
double f_va_2(char *fmt, ...) {
__builtin_va_list va;
@@ -133,40 +171,106 @@ double f_va_2(char *fmt, ...) {
// Two "aligned" register pairs.
-// CHECK-LABEL: define dso_local double @f_va_3
-// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[V:%.*]] = alloca double, align 8
-// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca double, align 8
-// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
-// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
-// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
-// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
-// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
-// CHECK-NEXT: store double [[TMP1]], ptr [[V]], align 8
-// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
-// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
-// CHECK-NEXT: store i32 [[TMP2]], ptr [[W]], align 4
-// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7
-// CHECK-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8)
-// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8
-// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8
-// CHECK-NEXT: store double [[TMP4]], ptr [[X]], align 8
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
-// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8
-// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]]
-// CHECK-NEXT: ret double [[ADD]]
+// CHECK-ILP32F-LABEL: define dso_local double @f_va_3
+// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32F-NEXT: entry:
+// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8
+// CHECK-ILP32F-NEXT: [[W:%.*]] = alloca i32, align 4
+// CHECK-ILP32F-NEXT: [[X:%.*]] = alloca double, align 8
+// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8
+// CHECK-ILP32F-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-ILP32F-NEXT: store i32 [[TMP2]], ptr [[W]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7
+// CHECK-ILP32F-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8)
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8
+// CHECK-ILP32F-NEXT: store double [[TMP4]], ptr [[X]], align 8
+// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8
+// CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8
+// CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]]
+// CHECK-ILP32F-NEXT: ret double [[ADD]]
+//
+// CHECK-ILP32D-LABEL: define dso_local double @f_va_3
+// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32D-NEXT: entry:
+// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8
+// CHECK-ILP32D-NEXT: [[W:%.*]] = alloca i32, align 4
+// CHECK-ILP32D-NEXT: [[X:%.*]] = alloca double, align 8
+// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8
+// CHECK-ILP32D-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-ILP32D-NEXT: store i32 [[TMP2]], ptr [[W]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7
+// CHECK-ILP32D-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8)
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8
+// CHECK-ILP32D-NEXT: store double [[TMP4]], ptr [[X]], align 8
+// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8
+// CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8
+// CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]]
+// CHECK-ILP32D-NEXT: ret double [[ADD]]
+//
+// CHECK-ILP32E-LABEL: define dso_local double @f_va_3
+// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32E-NEXT: entry:
+// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8
+// CHECK-ILP32E-NEXT: [[W:%.*]] = alloca i32, align 4
+// CHECK-ILP32E-NEXT: [[X:%.*]] = alloca double, align 8
+// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4
+// CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8
+// CHECK-ILP32E-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-ILP32E-NEXT: store i32 [[TMP1]], ptr [[W]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 8
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load double, ptr [[ARGP_CUR3]], align 4
+// CHECK-ILP32E-NEXT: store double [[TMP2]], ptr [[X]], align 8
+// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load double, ptr [[V]], align 8
+// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load double, ptr [[X]], align 8
+// CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], [[TMP4]]
+// CHECK-ILP32E-NEXT: ret double [[ADD]]
//
double f_va_3(char *fmt, ...) {
__builtin_va_list va;
@@ -180,93 +284,269 @@ double f_va_3(char *fmt, ...) {
return v + x;
}
-// CHECK-LABEL: define dso_local i32 @f_va_4
-// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[LD:%.*]] = alloca fp128, align 16
-// CHECK-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1
-// CHECK-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4
-// CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
-// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
-// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
-// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
-// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
-// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16
-// CHECK-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16
-// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
-// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false)
-// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8
-// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false)
-// CHECK-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4
-// CHECK-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4
-// CHECK-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4
-// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
-// CHECK-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
-// CHECK-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]]
-// CHECK-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32
-// CHECK-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4
-// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0
-// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32
-// CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]]
-// CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1
-// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1
-// CHECK-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32
-// CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
-// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2
-// CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1
-// CHECK-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32
-// CHECK-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]]
-// CHECK-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3
-// CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1
-// CHECK-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32
-// CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]]
-// CHECK-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4
-// CHECK-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0
-// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4
-// CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1
-// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4
-// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32
-// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]]
-// CHECK-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4
-// CHECK-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0
-// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4
-// CHECK-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4
-// CHECK-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]]
-// CHECK-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2
-// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4
-// CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]]
-// CHECK-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3
-// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4
-// CHECK-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]]
-// CHECK-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4
-// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4
-// CHECK-NEXT: ret i32 [[TMP20]]
+// CHECK-ILP32F-LABEL: define dso_local i32 @f_va_4
+// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32F-NEXT: entry:
+// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca i32, align 4
+// CHECK-ILP32F-NEXT: [[LD:%.*]] = alloca fp128, align 16
+// CHECK-ILP32F-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1
+// CHECK-ILP32F-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4
+// CHECK-ILP32F-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
+// CHECK-ILP32F-NEXT: [[RET:%.*]] = alloca i32, align 4
+// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-ILP32F-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4
+// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16
+// CHECK-ILP32F-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16
+// CHECK-ILP32F-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false)
+// CHECK-ILP32F-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false)
+// CHECK-ILP32F-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4
+// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
+// CHECK-ILP32F-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
+// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
+// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4
+// CHECK-ILP32F-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
+// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
+// CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]]
+// CHECK-ILP32F-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32
+// CHECK-ILP32F-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0
+// CHECK-ILP32F-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-ILP32F-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32
+// CHECK-ILP32F-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]]
+// CHECK-ILP32F-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1
+// CHECK-ILP32F-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1
+// CHECK-ILP32F-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32
+// CHECK-ILP32F-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
+// CHECK-ILP32F-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2
+// CHECK-ILP32F-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1
+// CHECK-ILP32F-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32
+// CHECK-ILP32F-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]]
+// CHECK-ILP32F-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3
+// CHECK-ILP32F-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1
+// CHECK-ILP32F-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32
+// CHECK-ILP32F-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]]
+// CHECK-ILP32F-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0
+// CHECK-ILP32F-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4
+// CHECK-ILP32F-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK-ILP32F-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1
+// CHECK-ILP32F-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4
+// CHECK-ILP32F-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32
+// CHECK-ILP32F-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]]
+// CHECK-ILP32F-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0
+// CHECK-ILP32F-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4
+// CHECK-ILP32F-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK-ILP32F-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1
+// CHECK-ILP32F-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4
+// CHECK-ILP32F-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]]
+// CHECK-ILP32F-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2
+// CHECK-ILP32F-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4
+// CHECK-ILP32F-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]]
+// CHECK-ILP32F-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3
+// CHECK-ILP32F-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4
+// CHECK-ILP32F-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]]
+// CHECK-ILP32F-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32F-NEXT: ret i32 [[TMP20]]
+//
+// CHECK-ILP32D-LABEL: define dso_local i32 @f_va_4
+// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32D-NEXT: entry:
+// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca i32, align 4
+// CHECK-ILP32D-NEXT: [[LD:%.*]] = alloca fp128, align 16
+// CHECK-ILP32D-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1
+// CHECK-ILP32D-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4
+// CHECK-ILP32D-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
+// CHECK-ILP32D-NEXT: [[RET:%.*]] = alloca i32, align 4
+// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-ILP32D-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4
+// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16
+// CHECK-ILP32D-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16
+// CHECK-ILP32D-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false)
+// CHECK-ILP32D-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false)
+// CHECK-ILP32D-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4
+// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
+// CHECK-ILP32D-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
+// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
+// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4
+// CHECK-ILP32D-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
+// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
+// CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]]
+// CHECK-ILP32D-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32
+// CHECK-ILP32D-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0
+// CHECK-ILP32D-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-ILP32D-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32
+// CHECK-ILP32D-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]]
+// CHECK-ILP32D-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1
+// CHECK-ILP32D-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1
+// CHECK-ILP32D-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32
+// CHECK-ILP32D-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
+// CHECK-ILP32D-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2
+// CHECK-ILP32D-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1
+// CHECK-ILP32D-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32
+// CHECK-ILP32D-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]]
+// CHECK-ILP32D-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3
+// CHECK-ILP32D-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1
+// CHECK-ILP32D-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32
+// CHECK-ILP32D-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]]
+// CHECK-ILP32D-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0
+// CHECK-ILP32D-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4
+// CHECK-ILP32D-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK-ILP32D-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1
+// CHECK-ILP32D-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4
+// CHECK-ILP32D-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32
+// CHECK-ILP32D-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]]
+// CHECK-ILP32D-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0
+// CHECK-ILP32D-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4
+// CHECK-ILP32D-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK-ILP32D-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1
+// CHECK-ILP32D-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4
+// CHECK-ILP32D-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]]
+// CHECK-ILP32D-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2
+// CHECK-ILP32D-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4
+// CHECK-ILP32D-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]]
+// CHECK-ILP32D-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3
+// CHECK-ILP32D-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4
+// CHECK-ILP32D-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]]
+// CHECK-ILP32D-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32D-NEXT: ret i32 [[TMP20]]
+//
+// CHECK-ILP32E-LABEL: define dso_local i32 @f_va_4
+// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] {
+// CHECK-ILP32E-NEXT: entry:
+// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4
+// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca i32, align 4
+// CHECK-ILP32E-NEXT: [[LD:%.*]] = alloca fp128, align 16
+// CHECK-ILP32E-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1
+// CHECK-ILP32E-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4
+// CHECK-ILP32E-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
+// CHECK-ILP32E-NEXT: [[RET:%.*]] = alloca i32, align 4
+// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
+// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-ILP32E-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4
+// CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 4
+// CHECK-ILP32E-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16
+// CHECK-ILP32E-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false)
+// CHECK-ILP32E-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false)
+// CHECK-ILP32E-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4
+// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
+// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
+// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
+// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4
+// CHECK-ILP32E-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
+// CHECK-ILP32E-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
+// CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]]
+// CHECK-ILP32E-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32
+// CHECK-ILP32E-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0
+// CHECK-ILP32E-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-ILP32E-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32
+// CHECK-ILP32E-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]]
+// CHECK-ILP32E-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1
+// CHECK-ILP32E-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1
+// CHECK-ILP32E-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32
+// CHECK-ILP32E-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]]
+// CHECK-ILP32E-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2
+// CHECK-ILP32E-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1
+// CHECK-ILP32E-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32
+// CHECK-ILP32E-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]]
+// CHECK-ILP32E-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3
+// CHECK-ILP32E-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1
+// CHECK-ILP32E-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32
+// CHECK-ILP32E-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]]
+// CHECK-ILP32E-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0
+// CHECK-ILP32E-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4
+// CHECK-ILP32E-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK-ILP32E-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1
+// CHECK-ILP32E-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4
+// CHECK-ILP32E-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32
+// CHECK-ILP32E-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]]
+// CHECK-ILP32E-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0
+// CHECK-ILP32E-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4
+// CHECK-ILP32E-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK-ILP32E-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1
+// CHECK-ILP32E-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4
+// CHECK-ILP32E-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]]
+// CHECK-ILP32E-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2
+// CHECK-ILP32E-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4
+// CHECK-ILP32E-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]]
+// CHECK-ILP32E-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3
+// CHECK-ILP32E-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4
+// CHECK-ILP32E-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]]
+// CHECK-ILP32E-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4
+// CHECK-ILP32E-NEXT: ret i32 [[TMP20]]
//
int f_va_4(char *fmt, ...) {
__builtin_va_list va;
diff --git a/clang/test/CodeGen/RISCV/riscv64-abi.c b/clang/test/CodeGen/RISCV/riscv64-abi.c
index 3e7654851da0efa..021565238904e4a 100644
--- a/clang/test/CodeGen/RISCV/riscv64-abi.c
+++ b/clang/test/CodeGen/RISCV/riscv64-abi.c
@@ -5,6 +5,8 @@
// RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64F-LP64D,LP64-LP64F,LP64F %s
// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - \
// RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64F-LP64D,LP64D %s
+// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-abi lp64e %s -o - \
+// RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64-LP64F,LP64,LP64E %s
#include <stddef.h>
#include <stdint.h>
@@ -2046,3 +2048,5 @@ union float16_u f_ret_float16_u(void) {
return (union float16_u){1.0};
}
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// LP64E: {{.*}}
diff --git a/clang/test/CodeGen/RISCV/riscv64-vararg.c b/clang/test/CodeGen/RISCV/riscv64-vararg.c
index 26261f0095ac927..634cde61320cb61 100644
--- a/clang/test/CodeGen/RISCV/riscv64-vararg.c
+++ b/clang/test/CodeGen/RISCV/riscv64-vararg.c
@@ -4,6 +4,8 @@
// RUN: | FileCheck %s
// RUN: %clang_cc1 -triple riscv64 -target-feature +d -target-feature +f -target-abi lp64d -emit-llvm %s -o - \
// RUN: | FileCheck %s
+// RUN: %clang_cc1 -triple riscv64 -target-abi lp64e -emit-llvm %s -o - \
+// RUN: | FileCheck %s
#include <stddef.h>
#include <stdint.h>
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 02d8d34116f804c..dfc24555f404897 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -3,6 +3,8 @@
// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -x c -E -dM %s \
// RUN: -o - | FileCheck %s
+// CHECK-NOT: __riscv_32e {{.*$}}
+// CHECK-NOT: __riscv_64e {{.*$}}
// CHECK-NOT: __riscv_a {{.*$}}
// CHECK-NOT: __riscv_atomic
// CHECK-NOT: __riscv_c {{.*$}}
@@ -170,6 +172,17 @@
// CHECK-D-EXT: __riscv_flen 64
// CHECK-D-EXT: __riscv_fsqrt 1
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN: -march=rv32e -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefixes=CHECK-E-EXT,CHECK-RV32E %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN: -march=rv64e -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefixes=CHECK-E-EXT,CHECK-RV64E %s
+// CHECK-RV32E: __riscv_32e 1
+// CHECK-RV64E: __riscv_64e 1
+// CHECK-E-EXT: __riscv_abi_rve 1
+// CHECK-E-EXT: __riscv_e 2000000{{$}}
+
// RUN: %clang --target=riscv32-unknown-linux-gnu \
// RUN: -march=rv32if -x c -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-F-EXT %s
@@ -211,6 +224,15 @@
// CHECK-DOUBLE-NOT: __riscv_float_abi_soft
// CHECK-DOUBLE-NOT: __riscv_float_abi_single
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN: -march=rv32i -mabi=ilp32e -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-ILP32E %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN: -march=rv64i -mabi=lp64e -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-LP64E %s
+// CHECK-ILP32E: __riscv_abi_rve 1
+// CHECK-LP64E: __riscv_abi_rve 1
+
// RUN: %clang --target=riscv32-unknown-linux-gnu \
// RUN: -march=rv32ih -x c -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-H-EXT %s
diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index 99c7146825f5ee0..0155d5dbc0e841b 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -88,6 +88,7 @@ on support follow.
``C`` Supported
``D`` Supported
``F`` Supported
+ ``E`` Supported (`See note <#riscv-rve-note>`__)
``H`` Assembly Support
``M`` Supported
``Smaia`` Supported
@@ -179,6 +180,11 @@ Assembly Support
Supported
Fully supported by the compiler. This includes everything in Assembly Support, along with - if relevant - C language intrinsics for the instructions and pattern matching by the compiler to recognize idiomatic patterns which can be lowered to the associated instructions.
+.. _riscv-rve-note:
+
+``E``
+ Support of RV32E/RV64E and ilp32e/lp64e ABIs are experimental. To be compatible with the implementation of ilp32e in GCC, we don't use aligned registers to pass variadic arguments. Furthermore, we set the stack alignment to 4 bytes for types with length of 2*XLEN.
+
.. _riscv-scalar-crypto-note1:
``Zbkb``, ``Zbkx``
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 9460bc992b0aaed..e02f68f07d93bb9 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -155,6 +155,8 @@ Changes to the RISC-V Backend
needs to work with SiFive to define and document real extension names for
individual CSRs and instructions.
* ``-mcpu=sifive-p450`` was added.
+* CodeGen of RV32E/RV64E was supported experimentally.
+* CodeGen of ilp32e/lp64e was supported experimentally.
Changes to the WebAssembly Backend
----------------------------------
diff --git a/llvm/include/llvm/Support/RISCVAttributes.h b/llvm/include/llvm/Support/RISCVAttributes.h
index a8ce8f4d8daf452..8643debb78ebc4b 100644
--- a/llvm/include/llvm/Support/RISCVAttributes.h
+++ b/llvm/include/llvm/Support/RISCVAttributes.h
@@ -34,7 +34,7 @@ enum AttrType : unsigned {
PRIV_SPEC_REVISION = 12,
};
-enum StackAlign { ALIGN_4 = 4, ALIGN_16 = 16 };
+enum StackAlign { ALIGN_4 = 4, ALIGN_8 = 8, ALIGN_16 = 16 };
enum { NOT_ALLOWED = 0, ALLOWED = 1 };
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 390d950486a7956..f4b3947e80faeb8 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -1278,20 +1278,20 @@ RISCVISAInfo::postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo) {
StringRef RISCVISAInfo::computeDefaultABI() const {
if (XLen == 32) {
+ if (hasExtension("e"))
+ return "ilp32e";
if (hasExtension("d"))
return "ilp32d";
if (hasExtension("f"))
return "ilp32f";
- if (hasExtension("e"))
- return "ilp32e";
return "ilp32";
} else if (XLen == 64) {
+ if (hasExtension("e"))
+ return "lp64e";
if (hasExtension("d"))
return "lp64d";
if (hasExtension("f"))
return "lp64f";
- if (hasExtension("e"))
- return "lp64e";
return "lp64";
}
llvm_unreachable("Invalid XLEN");
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 5a81c5c7c9f27fb..26eac17ed24c9f3 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -431,7 +431,7 @@ void RISCVCallLowering::saveVarArgRegisters(
MachineFunction &MF = MIRBuilder.getMF();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
unsigned XLenInBytes = Subtarget.getXLen() / 8;
- ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
MachineFrameInfo &MFI = MF.getFrameInfo();
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 74d0db545e556c2..be9c7d190b55ac7 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -68,6 +68,11 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
TargetABI = ABI_Unknown;
}
+ if ((TargetABI == RISCVABI::ABI::ABI_ILP32E ||
+ (TargetABI == ABI_Unknown && IsRVE && !IsRV64)) &&
+ FeatureBits[RISCV::FeatureStdExtD])
+ report_fatal_error("ILP32E cannot be used with the D ISA extension");
+
if (TargetABI != ABI_Unknown)
return TargetABI;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 29ffc3224b525b2..ac4861bf113eb2a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -50,11 +50,14 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) {
void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI,
bool EmitStackAlign) {
- if (STI.hasFeature(RISCV::FeatureRVE))
- report_fatal_error("Codegen not yet implemented for RVE");
-
- if (EmitStackAlign)
- emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16);
+ if (EmitStackAlign) {
+ if (TargetABI == RISCVABI::ABI_ILP32E)
+ emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_4);
+ else if (TargetABI == RISCVABI::ABI_LP64E)
+ emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_8);
+ else
+ emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16);
+ }
auto ParseResult = RISCVFeatures::parseFeatureBits(
STI.hasFeature(RISCV::Feature64Bit), STI.getFeatureBits());
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td
index 3dd0b3723828656..11b716f20f37166 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -13,8 +13,10 @@
// The RISC-V calling convention is handled with custom code in
// RISCVISelLowering.cpp (CC_RISCV).
+def CSR_ILP32E_LP64E : CalleeSavedRegs<(add X1, X8, X9)>;
+
def CSR_ILP32_LP64
- : CalleeSavedRegs<(add X1, X8, X9, (sequence "X%u", 18, 27))>;
+ : CalleeSavedRegs<(add CSR_ILP32E_LP64E, (sequence "X%u", 18, 27))>;
def CSR_ILP32F_LP64F
: CalleeSavedRegs<(add CSR_ILP32_LP64,
@@ -38,3 +40,15 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
// Same as CSR_Interrupt, but including all 64-bit FP registers.
def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
(sequence "F%u_D", 0, 31))>;
+
+// Same as CSR_Interrupt, but excluding X16-X31.
+def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt,
+ (sequence "X%u", 16, 31))>;
+
+// Same as CSR_XLEN_F32_Interrupt, but excluding X16-X31.
+def CSR_XLEN_F32_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F32_Interrupt,
+ (sequence "X%u", 16, 31))>;
+
+// Same as CSR_XLEN_F64_Interrupt, but excluding X16-X31.
+def CSR_XLEN_F64_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F64_Interrupt,
+ (sequence "X%u", 16, 31))>;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index f4af08fcb9b3d98..fa334c69ddc982b 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -17,6 +17,13 @@ def HasStdExtZicsr : Predicate<"Subtarget->hasStdExtZicsr()">,
AssemblerPredicate<(all_of FeatureStdExtZicsr),
"'Zicsr' (CSRs)">;
+def FeatureStdExtI
+ : SubtargetFeature<"i", "HasStdExtI", "true",
+ "'I' (Base Integer Instruction Set)">;
+def HasStdExtI : Predicate<"Subtarget->hasStdExtI()">,
+ AssemblerPredicate<(all_of FeatureStdExtI),
+ "'I' (Base Integer Instruction Set)">;
+
def FeatureStdExtM
: SubtargetFeature<"m", "HasStdExtM", "true",
"'M' (Integer Multiplication and Division)">;
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 8dfea6d3862057c..d793c0b7377baf0 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -27,6 +27,21 @@
using namespace llvm;
+static Align getABIStackAlignment(RISCVABI::ABI ABI) {
+ if (ABI == RISCVABI::ABI_ILP32E)
+ return Align(4);
+ if (ABI == RISCVABI::ABI_LP64E)
+ return Align(8);
+ return Align(16);
+}
+
+RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI)
+ : TargetFrameLowering(StackGrowsDown,
+ getABIStackAlignment(STI.getTargetABI()),
+ /*LocalAreaOffset=*/0,
+ /*TransientStackAlignment=*/Align(16)),
+ STI(STI) {}
+
static const Register AllPopRegs[] = {
RISCV::X1, RISCV::X8, RISCV::X9, RISCV::X18, RISCV::X19,
RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, RISCV::X24,
@@ -497,9 +512,11 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// The following calculates the correct offset knowing the number of callee
// saved registers spilt by the two methods.
if (int LibCallRegs = getLibCallID(MF, MFI.getCalleeSavedInfo()) + 1) {
- // Calculate the size of the frame managed by the libcall. The libcalls are
- // implemented such that the stack will always be 16 byte aligned.
- unsigned LibCallFrameSize = alignTo((STI.getXLen() / 8) * LibCallRegs, 16);
+ // Calculate the size of the frame managed by the libcall. The stack
+ // alignment of these libcalls should be the same as how we set it in
+ // getABIStackAlignment.
+ unsigned LibCallFrameSize =
+ alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
RVFI->setLibCallStackSize(LibCallFrameSize);
}
@@ -974,6 +991,7 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
// unconditionally save all Caller-saved registers and
// all FP registers, regardless whether they are used.
MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
if (MF.getFunction().hasFnAttribute("interrupt") && MFI.hasCalls()) {
@@ -985,9 +1003,20 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
};
for (auto Reg : CSRegs)
- SavedRegs.set(Reg);
+ // Only save x0-x15 for RVE.
+ if (Reg < RISCV::X16 || !Subtarget.isRVE())
+ SavedRegs.set(Reg);
+
+ // According to psABI, if ilp32e/lp64e ABIs are used with an ISA that
+ // has any of the registers x16-x31 and f0-f31, then these registers are
+ // considered temporaries, so we should also save x16-x31 here.
+ if (STI.getTargetABI() == RISCVABI::ABI_ILP32E ||
+ STI.getTargetABI() == RISCVABI::ABI_LP64E) {
+ for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++)
+ SavedRegs.set(Reg);
+ }
- if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) {
+ if (Subtarget.hasStdExtF()) {
// If interrupt is enabled, this list contains all FP registers.
const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs();
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 9bc100981f2f7b9..5c1c7317d24bc6a 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -21,12 +21,7 @@ class RISCVSubtarget;
class RISCVFrameLowering : public TargetFrameLowering {
public:
- explicit RISCVFrameLowering(const RISCVSubtarget &STI)
- : TargetFrameLowering(StackGrowsDown,
- /*StackAlignment=*/Align(16),
- /*LocalAreaOffset=*/0,
- /*TransientStackAlignment=*/Align(16)),
- STI(STI) {}
+ explicit RISCVFrameLowering(const RISCVSubtarget &STI);
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2c93daa4a2c375e..b09afab15d11b8f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -83,9 +83,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- if (Subtarget.isRVE())
- report_fatal_error("Codegen not yet implemented for RVE");
-
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
@@ -107,6 +104,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
default:
report_fatal_error("Don't know how to lower this ABI");
case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_ILP32E:
+ case RISCVABI::ABI_LP64E:
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64:
@@ -17061,12 +17060,39 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
RISCV::V20M4};
static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
-ArrayRef<MCPhysReg> RISCV::getArgGPRs() {
- static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
- RISCV::X13, RISCV::X14, RISCV::X15,
- RISCV::X16, RISCV::X17};
+ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
+ // the ILP32E ABI.
+ static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15,
+ RISCV::X16, RISCV::X17};
+ // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
+ static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15};
+
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(ArgEGPRs);
+
+ return ArrayRef(ArgIGPRs);
+}
+
+static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
+ // for save-restore libcall, so we don't use them.
+ static const MCPhysReg FastCCIGPRs[] = {
+ RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
+ RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
+ RISCV::X29, RISCV::X30, RISCV::X31};
+
+ // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
+ static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15,
+ RISCV::X7};
- return ArrayRef(ArgGPRs);
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(FastCCEGPRs);
+
+ return ArrayRef(FastCCIGPRs);
}
// Pass a 2*XLEN argument that has been split into two XLEN values through
@@ -17074,17 +17100,23 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs() {
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
MVT ValVT2, MVT LocVT2,
- ISD::ArgFlagsTy ArgFlags2) {
+ ISD::ArgFlagsTy ArgFlags2, bool EABI) {
unsigned XLenInBytes = XLen / 8;
- ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
+ const RISCVSubtarget &STI =
+ State.getMachineFunction().getSubtarget<RISCVSubtarget>();
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI());
+
if (Register Reg = State.AllocateReg(ArgGPRs)) {
// At least one half can be passed via register.
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
VA1.getLocVT(), CCValAssign::Full));
} else {
// Both halves must be passed on the stack, with proper alignment.
- Align StackAlign =
- std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
+ // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
+ // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
+ Align StackAlign(XLenInBytes);
+ if (!EABI || XLen != 32)
+ StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
State.addLoc(
CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
State.AllocateStack(XLenInBytes, StackAlign),
@@ -17165,7 +17197,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
default:
llvm_unreachable("Unexpected ABI");
case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_ILP32E:
case RISCVABI::ABI_LP64:
+ case RISCVABI::ABI_LP64E:
break;
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_LP64F:
@@ -17197,7 +17231,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
LocInfo = CCValAssign::BCvt;
}
- ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
// If this is a variadic argument, the RISC-V calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
@@ -17206,9 +17240,13 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// legalisation or not. The argument will not be passed by registers if the
// original type is larger than 2*XLEN, so the register alignment rule does
// not apply.
+ // TODO: To be compatible with GCC's behaviors, we don't align registers
+ // currently if we are using ILP32E calling convention. This behavior may be
+ // changed when RV32E/ILP32E is ratified.
unsigned TwoXLenInBytes = (2 * XLen) / 8;
if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
- DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
+ DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
+ ABI != RISCVABI::ABI_ILP32E) {
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
// Skip 'odd' register if necessary.
if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
@@ -17281,8 +17319,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
ISD::ArgFlagsTy AF = PendingArgFlags[0];
PendingLocs.clear();
PendingArgFlags.clear();
- return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
- ArgFlags);
+ return CC_RISCVAssign2XLen(
+ XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
+ ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
}
// Allocate to a register if possible, or else a stack slot.
@@ -17608,15 +17647,8 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument) {
-
- // X5 and X6 might be used for save-restore libcall.
- static const MCPhysReg GPRList[] = {
- RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
- RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
- RISCV::X29, RISCV::X30, RISCV::X31};
-
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
- if (unsigned Reg = State.AllocateReg(GPRList)) {
+ if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
@@ -17667,7 +17699,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
(LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && Subtarget.is64Bit() &&
Subtarget.hasStdExtZdinx())) {
- if (unsigned Reg = State.AllocateReg(GPRList)) {
+ if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
@@ -17701,7 +17733,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
- if (unsigned GPRReg = State.AllocateReg(GPRList)) {
+ if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
LocInfo = CCValAssign::Indirect;
LocVT = TLI.getSubtarget().getXLenVT();
State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
@@ -17802,6 +17834,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
case CallingConv::GRAAL:
break;
case CallingConv::GHC:
+ if (Subtarget.isRVE())
+ report_fatal_error("GHC calling convention is not supported on RVE!");
if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
"(Zdinx/D) instruction set extensions");
@@ -17884,7 +17918,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
if (IsVarArg) {
- ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -18037,9 +18071,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- if (CallConv == CallingConv::GHC)
+ if (CallConv == CallingConv::GHC) {
+ if (Subtarget.isRVE())
+ report_fatal_error("GHC calling convention is not supported on RVE!");
ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
- else
+ } else
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
: RISCV::CC_RISCV);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index c65953e37b17105..a55a6046166719c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -995,7 +995,7 @@ bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
-ArrayRef<MCPhysReg> getArgGPRs();
+ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI);
} // end namespace RISCV
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 08e83c71c82defc..730838ea004aa2a 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -63,13 +63,18 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.hasStdExtD())
return CSR_XLEN_F64_Interrupt_SaveList;
if (Subtarget.hasStdExtF())
- return CSR_XLEN_F32_Interrupt_SaveList;
- return CSR_Interrupt_SaveList;
+ return Subtarget.isRVE() ? CSR_XLEN_F32_Interrupt_RVE_SaveList
+ : CSR_XLEN_F32_Interrupt_SaveList;
+ return Subtarget.isRVE() ? CSR_Interrupt_RVE_SaveList
+ : CSR_Interrupt_SaveList;
}
switch (Subtarget.getTargetABI()) {
default:
llvm_unreachable("Unrecognized ABI");
+ case RISCVABI::ABI_ILP32E:
+ case RISCVABI::ABI_LP64E:
+ return CSR_ILP32E_LP64E_SaveList;
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_LP64:
return CSR_ILP32_LP64_SaveList;
@@ -109,6 +114,11 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// beginning with 'x0' for instructions that take register pairs.
markSuperRegs(Reserved, RISCV::DUMMY_REG_PAIR_WITH_X0);
+ // There are only 16 GPRs for RVE.
+ if (Subtarget.isRVE())
+ for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++)
+ markSuperRegs(Reserved, Reg);
+
// V registers for code generation. We handle them manually.
markSuperRegs(Reserved, RISCV::VL);
markSuperRegs(Reserved, RISCV::VTYPE);
@@ -673,6 +683,9 @@ RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF,
switch (Subtarget.getTargetABI()) {
default:
llvm_unreachable("Unrecognized ABI");
+ case RISCVABI::ABI_ILP32E:
+ case RISCVABI::ABI_LP64E:
+ return CSR_ILP32E_LP64E_RegMask;
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_LP64:
return CSR_ILP32_LP64_RegMask;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index ab316f185b0d263..b4b81b545a54bbd 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -128,10 +128,20 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVPushPopOptPass(*PR);
}
-static StringRef computeDataLayout(const Triple &TT) {
- if (TT.isArch64Bit())
+static StringRef computeDataLayout(const Triple &TT,
+ const TargetOptions &Options) {
+ StringRef ABIName = Options.MCOptions.getABIName();
+ if (TT.isArch64Bit()) {
+ if (ABIName == "lp64e")
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64";
+
return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ }
assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
+
+ if (ABIName == "ilp32e")
+ return "e-m:e-p:32:32-i64:64-n32-S32";
+
return "e-m:e-p:32:32-i64:64-n32-S128";
}
@@ -146,7 +156,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OL, bool JIT)
- : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
+ : LLVMTargetMachine(T, computeDataLayout(TT, Options), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(std::make_unique<RISCVELFTargetObjectFile>()) {
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index 79e80dac8241fbb..2122b3fd91788b9 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=ILP32
+; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=ILP32E
; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=LP64
+; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=LP64E
; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=ILP32F
; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \
@@ -14,8 +18,8 @@
@var = global [32 x float] zeroinitializer
-; All floating point registers are temporaries for the ilp32 and lp64 ABIs.
-; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs.
+; All floating point registers are temporaries for the ilp32, ilp32e, lp64e and lp64
+; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs.
; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns
; something appropriate.
@@ -91,6 +95,76 @@ define void @callee() nounwind {
; ILP32-NEXT: fsw fa5, %lo(var)(a0)
; ILP32-NEXT: ret
;
+; ILP32E-LABEL: callee:
+; ILP32E: # %bb.0:
+; ILP32E-NEXT: lui a0, %hi(var)
+; ILP32E-NEXT: flw fa5, %lo(var)(a0)
+; ILP32E-NEXT: flw fa4, %lo(var+4)(a0)
+; ILP32E-NEXT: flw fa3, %lo(var+8)(a0)
+; ILP32E-NEXT: flw fa2, %lo(var+12)(a0)
+; ILP32E-NEXT: addi a1, a0, %lo(var)
+; ILP32E-NEXT: flw fa1, 16(a1)
+; ILP32E-NEXT: flw fa0, 20(a1)
+; ILP32E-NEXT: flw ft0, 24(a1)
+; ILP32E-NEXT: flw ft1, 28(a1)
+; ILP32E-NEXT: flw ft2, 32(a1)
+; ILP32E-NEXT: flw ft3, 36(a1)
+; ILP32E-NEXT: flw ft4, 40(a1)
+; ILP32E-NEXT: flw ft5, 44(a1)
+; ILP32E-NEXT: flw ft6, 48(a1)
+; ILP32E-NEXT: flw ft7, 52(a1)
+; ILP32E-NEXT: flw fa6, 56(a1)
+; ILP32E-NEXT: flw fa7, 60(a1)
+; ILP32E-NEXT: flw ft8, 64(a1)
+; ILP32E-NEXT: flw ft9, 68(a1)
+; ILP32E-NEXT: flw ft10, 72(a1)
+; ILP32E-NEXT: flw ft11, 76(a1)
+; ILP32E-NEXT: flw fs0, 80(a1)
+; ILP32E-NEXT: flw fs1, 84(a1)
+; ILP32E-NEXT: flw fs2, 88(a1)
+; ILP32E-NEXT: flw fs3, 92(a1)
+; ILP32E-NEXT: flw fs4, 96(a1)
+; ILP32E-NEXT: flw fs5, 100(a1)
+; ILP32E-NEXT: flw fs6, 104(a1)
+; ILP32E-NEXT: flw fs7, 108(a1)
+; ILP32E-NEXT: flw fs8, 124(a1)
+; ILP32E-NEXT: flw fs9, 120(a1)
+; ILP32E-NEXT: flw fs10, 116(a1)
+; ILP32E-NEXT: flw fs11, 112(a1)
+; ILP32E-NEXT: fsw fs8, 124(a1)
+; ILP32E-NEXT: fsw fs9, 120(a1)
+; ILP32E-NEXT: fsw fs10, 116(a1)
+; ILP32E-NEXT: fsw fs11, 112(a1)
+; ILP32E-NEXT: fsw fs7, 108(a1)
+; ILP32E-NEXT: fsw fs6, 104(a1)
+; ILP32E-NEXT: fsw fs5, 100(a1)
+; ILP32E-NEXT: fsw fs4, 96(a1)
+; ILP32E-NEXT: fsw fs3, 92(a1)
+; ILP32E-NEXT: fsw fs2, 88(a1)
+; ILP32E-NEXT: fsw fs1, 84(a1)
+; ILP32E-NEXT: fsw fs0, 80(a1)
+; ILP32E-NEXT: fsw ft11, 76(a1)
+; ILP32E-NEXT: fsw ft10, 72(a1)
+; ILP32E-NEXT: fsw ft9, 68(a1)
+; ILP32E-NEXT: fsw ft8, 64(a1)
+; ILP32E-NEXT: fsw fa7, 60(a1)
+; ILP32E-NEXT: fsw fa6, 56(a1)
+; ILP32E-NEXT: fsw ft7, 52(a1)
+; ILP32E-NEXT: fsw ft6, 48(a1)
+; ILP32E-NEXT: fsw ft5, 44(a1)
+; ILP32E-NEXT: fsw ft4, 40(a1)
+; ILP32E-NEXT: fsw ft3, 36(a1)
+; ILP32E-NEXT: fsw ft2, 32(a1)
+; ILP32E-NEXT: fsw ft1, 28(a1)
+; ILP32E-NEXT: fsw ft0, 24(a1)
+; ILP32E-NEXT: fsw fa0, 20(a1)
+; ILP32E-NEXT: fsw fa1, 16(a1)
+; ILP32E-NEXT: fsw fa2, %lo(var+12)(a0)
+; ILP32E-NEXT: fsw fa3, %lo(var+8)(a0)
+; ILP32E-NEXT: fsw fa4, %lo(var+4)(a0)
+; ILP32E-NEXT: fsw fa5, %lo(var)(a0)
+; ILP32E-NEXT: ret
+;
; LP64-LABEL: callee:
; LP64: # %bb.0:
; LP64-NEXT: lui a0, %hi(var)
@@ -161,6 +235,76 @@ define void @callee() nounwind {
; LP64-NEXT: fsw fa5, %lo(var)(a0)
; LP64-NEXT: ret
;
+; LP64E-LABEL: callee:
+; LP64E: # %bb.0:
+; LP64E-NEXT: lui a0, %hi(var)
+; LP64E-NEXT: flw fa5, %lo(var)(a0)
+; LP64E-NEXT: flw fa4, %lo(var+4)(a0)
+; LP64E-NEXT: flw fa3, %lo(var+8)(a0)
+; LP64E-NEXT: flw fa2, %lo(var+12)(a0)
+; LP64E-NEXT: addi a1, a0, %lo(var)
+; LP64E-NEXT: flw fa1, 16(a1)
+; LP64E-NEXT: flw fa0, 20(a1)
+; LP64E-NEXT: flw ft0, 24(a1)
+; LP64E-NEXT: flw ft1, 28(a1)
+; LP64E-NEXT: flw ft2, 32(a1)
+; LP64E-NEXT: flw ft3, 36(a1)
+; LP64E-NEXT: flw ft4, 40(a1)
+; LP64E-NEXT: flw ft5, 44(a1)
+; LP64E-NEXT: flw ft6, 48(a1)
+; LP64E-NEXT: flw ft7, 52(a1)
+; LP64E-NEXT: flw fa6, 56(a1)
+; LP64E-NEXT: flw fa7, 60(a1)
+; LP64E-NEXT: flw ft8, 64(a1)
+; LP64E-NEXT: flw ft9, 68(a1)
+; LP64E-NEXT: flw ft10, 72(a1)
+; LP64E-NEXT: flw ft11, 76(a1)
+; LP64E-NEXT: flw fs0, 80(a1)
+; LP64E-NEXT: flw fs1, 84(a1)
+; LP64E-NEXT: flw fs2, 88(a1)
+; LP64E-NEXT: flw fs3, 92(a1)
+; LP64E-NEXT: flw fs4, 96(a1)
+; LP64E-NEXT: flw fs5, 100(a1)
+; LP64E-NEXT: flw fs6, 104(a1)
+; LP64E-NEXT: flw fs7, 108(a1)
+; LP64E-NEXT: flw fs8, 124(a1)
+; LP64E-NEXT: flw fs9, 120(a1)
+; LP64E-NEXT: flw fs10, 116(a1)
+; LP64E-NEXT: flw fs11, 112(a1)
+; LP64E-NEXT: fsw fs8, 124(a1)
+; LP64E-NEXT: fsw fs9, 120(a1)
+; LP64E-NEXT: fsw fs10, 116(a1)
+; LP64E-NEXT: fsw fs11, 112(a1)
+; LP64E-NEXT: fsw fs7, 108(a1)
+; LP64E-NEXT: fsw fs6, 104(a1)
+; LP64E-NEXT: fsw fs5, 100(a1)
+; LP64E-NEXT: fsw fs4, 96(a1)
+; LP64E-NEXT: fsw fs3, 92(a1)
+; LP64E-NEXT: fsw fs2, 88(a1)
+; LP64E-NEXT: fsw fs1, 84(a1)
+; LP64E-NEXT: fsw fs0, 80(a1)
+; LP64E-NEXT: fsw ft11, 76(a1)
+; LP64E-NEXT: fsw ft10, 72(a1)
+; LP64E-NEXT: fsw ft9, 68(a1)
+; LP64E-NEXT: fsw ft8, 64(a1)
+; LP64E-NEXT: fsw fa7, 60(a1)
+; LP64E-NEXT: fsw fa6, 56(a1)
+; LP64E-NEXT: fsw ft7, 52(a1)
+; LP64E-NEXT: fsw ft6, 48(a1)
+; LP64E-NEXT: fsw ft5, 44(a1)
+; LP64E-NEXT: fsw ft4, 40(a1)
+; LP64E-NEXT: fsw ft3, 36(a1)
+; LP64E-NEXT: fsw ft2, 32(a1)
+; LP64E-NEXT: fsw ft1, 28(a1)
+; LP64E-NEXT: fsw ft0, 24(a1)
+; LP64E-NEXT: fsw fa0, 20(a1)
+; LP64E-NEXT: fsw fa1, 16(a1)
+; LP64E-NEXT: fsw fa2, %lo(var+12)(a0)
+; LP64E-NEXT: fsw fa3, %lo(var+8)(a0)
+; LP64E-NEXT: fsw fa4, %lo(var+4)(a0)
+; LP64E-NEXT: fsw fa5, %lo(var)(a0)
+; LP64E-NEXT: ret
+;
; ILP32F-LABEL: callee:
; ILP32F: # %bb.0:
; ILP32F-NEXT: addi sp, sp, -48
@@ -700,6 +844,149 @@ define void @caller() nounwind {
; ILP32-NEXT: addi sp, sp, 144
; ILP32-NEXT: ret
;
+; ILP32E-LABEL: caller:
+; ILP32E: # %bb.0:
+; ILP32E-NEXT: addi sp, sp, -140
+; ILP32E-NEXT: sw ra, 136(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: sw s0, 132(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: sw s1, 128(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: lui s0, %hi(var)
+; ILP32E-NEXT: flw fa5, %lo(var)(s0)
+; ILP32E-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, %lo(var+4)(s0)
+; ILP32E-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, %lo(var+8)(s0)
+; ILP32E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, %lo(var+12)(s0)
+; ILP32E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: addi s1, s0, %lo(var)
+; ILP32E-NEXT: flw fa5, 16(s1)
+; ILP32E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 20(s1)
+; ILP32E-NEXT: fsw fa5, 104(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 24(s1)
+; ILP32E-NEXT: fsw fa5, 100(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 28(s1)
+; ILP32E-NEXT: fsw fa5, 96(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 32(s1)
+; ILP32E-NEXT: fsw fa5, 92(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 36(s1)
+; ILP32E-NEXT: fsw fa5, 88(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 40(s1)
+; ILP32E-NEXT: fsw fa5, 84(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 44(s1)
+; ILP32E-NEXT: fsw fa5, 80(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 48(s1)
+; ILP32E-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 52(s1)
+; ILP32E-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 56(s1)
+; ILP32E-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 60(s1)
+; ILP32E-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 64(s1)
+; ILP32E-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 68(s1)
+; ILP32E-NEXT: fsw fa5, 56(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 72(s1)
+; ILP32E-NEXT: fsw fa5, 52(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 76(s1)
+; ILP32E-NEXT: fsw fa5, 48(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 80(s1)
+; ILP32E-NEXT: fsw fa5, 44(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 84(s1)
+; ILP32E-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 88(s1)
+; ILP32E-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 92(s1)
+; ILP32E-NEXT: fsw fa5, 32(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 96(s1)
+; ILP32E-NEXT: fsw fa5, 28(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 100(s1)
+; ILP32E-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 104(s1)
+; ILP32E-NEXT: fsw fa5, 20(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 108(s1)
+; ILP32E-NEXT: fsw fa5, 16(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 112(s1)
+; ILP32E-NEXT: fsw fa5, 12(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 116(s1)
+; ILP32E-NEXT: fsw fa5, 8(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 120(s1)
+; ILP32E-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: flw fa5, 124(s1)
+; ILP32E-NEXT: fsw fa5, 0(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: call callee
+; ILP32E-NEXT: flw fa5, 0(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 124(s1)
+; ILP32E-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 120(s1)
+; ILP32E-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 116(s1)
+; ILP32E-NEXT: flw fa5, 12(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 112(s1)
+; ILP32E-NEXT: flw fa5, 16(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 108(s1)
+; ILP32E-NEXT: flw fa5, 20(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 104(s1)
+; ILP32E-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 100(s1)
+; ILP32E-NEXT: flw fa5, 28(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 96(s1)
+; ILP32E-NEXT: flw fa5, 32(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 92(s1)
+; ILP32E-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 88(s1)
+; ILP32E-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 84(s1)
+; ILP32E-NEXT: flw fa5, 44(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 80(s1)
+; ILP32E-NEXT: flw fa5, 48(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 76(s1)
+; ILP32E-NEXT: flw fa5, 52(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 72(s1)
+; ILP32E-NEXT: flw fa5, 56(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 68(s1)
+; ILP32E-NEXT: flw fa5, 60(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 64(s1)
+; ILP32E-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 60(s1)
+; ILP32E-NEXT: flw fa5, 68(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 56(s1)
+; ILP32E-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 52(s1)
+; ILP32E-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 48(s1)
+; ILP32E-NEXT: flw fa5, 80(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 44(s1)
+; ILP32E-NEXT: flw fa5, 84(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 40(s1)
+; ILP32E-NEXT: flw fa5, 88(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 36(s1)
+; ILP32E-NEXT: flw fa5, 92(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 32(s1)
+; ILP32E-NEXT: flw fa5, 96(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 28(s1)
+; ILP32E-NEXT: flw fa5, 100(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 24(s1)
+; ILP32E-NEXT: flw fa5, 104(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 20(s1)
+; ILP32E-NEXT: flw fa5, 108(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, 16(s1)
+; ILP32E-NEXT: flw fa5, 112(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, %lo(var+12)(s0)
+; ILP32E-NEXT: flw fa5, 116(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, %lo(var+8)(s0)
+; ILP32E-NEXT: flw fa5, 120(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, %lo(var+4)(s0)
+; ILP32E-NEXT: flw fa5, 124(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: fsw fa5, %lo(var)(s0)
+; ILP32E-NEXT: lw ra, 136(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: lw s0, 132(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: lw s1, 128(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: addi sp, sp, 140
+; ILP32E-NEXT: ret
+;
; LP64-LABEL: caller:
; LP64: # %bb.0:
; LP64-NEXT: addi sp, sp, -160
@@ -843,6 +1130,149 @@ define void @caller() nounwind {
; LP64-NEXT: addi sp, sp, 160
; LP64-NEXT: ret
;
+; LP64E-LABEL: caller:
+; LP64E: # %bb.0:
+; LP64E-NEXT: addi sp, sp, -152
+; LP64E-NEXT: sd ra, 144(sp) # 8-byte Folded Spill
+; LP64E-NEXT: sd s0, 136(sp) # 8-byte Folded Spill
+; LP64E-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
+; LP64E-NEXT: lui s0, %hi(var)
+; LP64E-NEXT: flw fa5, %lo(var)(s0)
+; LP64E-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, %lo(var+4)(s0)
+; LP64E-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, %lo(var+8)(s0)
+; LP64E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, %lo(var+12)(s0)
+; LP64E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
+; LP64E-NEXT: addi s1, s0, %lo(var)
+; LP64E-NEXT: flw fa5, 16(s1)
+; LP64E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 20(s1)
+; LP64E-NEXT: fsw fa5, 104(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 24(s1)
+; LP64E-NEXT: fsw fa5, 100(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 28(s1)
+; LP64E-NEXT: fsw fa5, 96(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 32(s1)
+; LP64E-NEXT: fsw fa5, 92(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 36(s1)
+; LP64E-NEXT: fsw fa5, 88(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 40(s1)
+; LP64E-NEXT: fsw fa5, 84(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 44(s1)
+; LP64E-NEXT: fsw fa5, 80(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 48(s1)
+; LP64E-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 52(s1)
+; LP64E-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 56(s1)
+; LP64E-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 60(s1)
+; LP64E-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 64(s1)
+; LP64E-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 68(s1)
+; LP64E-NEXT: fsw fa5, 56(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 72(s1)
+; LP64E-NEXT: fsw fa5, 52(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 76(s1)
+; LP64E-NEXT: fsw fa5, 48(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 80(s1)
+; LP64E-NEXT: fsw fa5, 44(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 84(s1)
+; LP64E-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 88(s1)
+; LP64E-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 92(s1)
+; LP64E-NEXT: fsw fa5, 32(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 96(s1)
+; LP64E-NEXT: fsw fa5, 28(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 100(s1)
+; LP64E-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 104(s1)
+; LP64E-NEXT: fsw fa5, 20(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 108(s1)
+; LP64E-NEXT: fsw fa5, 16(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 112(s1)
+; LP64E-NEXT: fsw fa5, 12(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 116(s1)
+; LP64E-NEXT: fsw fa5, 8(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 120(s1)
+; LP64E-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill
+; LP64E-NEXT: flw fa5, 124(s1)
+; LP64E-NEXT: fsw fa5, 0(sp) # 4-byte Folded Spill
+; LP64E-NEXT: call callee
+; LP64E-NEXT: flw fa5, 0(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 124(s1)
+; LP64E-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 120(s1)
+; LP64E-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 116(s1)
+; LP64E-NEXT: flw fa5, 12(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 112(s1)
+; LP64E-NEXT: flw fa5, 16(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 108(s1)
+; LP64E-NEXT: flw fa5, 20(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 104(s1)
+; LP64E-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 100(s1)
+; LP64E-NEXT: flw fa5, 28(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 96(s1)
+; LP64E-NEXT: flw fa5, 32(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 92(s1)
+; LP64E-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 88(s1)
+; LP64E-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 84(s1)
+; LP64E-NEXT: flw fa5, 44(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 80(s1)
+; LP64E-NEXT: flw fa5, 48(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 76(s1)
+; LP64E-NEXT: flw fa5, 52(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 72(s1)
+; LP64E-NEXT: flw fa5, 56(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 68(s1)
+; LP64E-NEXT: flw fa5, 60(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 64(s1)
+; LP64E-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 60(s1)
+; LP64E-NEXT: flw fa5, 68(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 56(s1)
+; LP64E-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 52(s1)
+; LP64E-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 48(s1)
+; LP64E-NEXT: flw fa5, 80(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 44(s1)
+; LP64E-NEXT: flw fa5, 84(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 40(s1)
+; LP64E-NEXT: flw fa5, 88(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 36(s1)
+; LP64E-NEXT: flw fa5, 92(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 32(s1)
+; LP64E-NEXT: flw fa5, 96(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 28(s1)
+; LP64E-NEXT: flw fa5, 100(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 24(s1)
+; LP64E-NEXT: flw fa5, 104(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 20(s1)
+; LP64E-NEXT: flw fa5, 108(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, 16(s1)
+; LP64E-NEXT: flw fa5, 112(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, %lo(var+12)(s0)
+; LP64E-NEXT: flw fa5, 116(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, %lo(var+8)(s0)
+; LP64E-NEXT: flw fa5, 120(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, %lo(var+4)(s0)
+; LP64E-NEXT: flw fa5, 124(sp) # 4-byte Folded Reload
+; LP64E-NEXT: fsw fa5, %lo(var)(s0)
+; LP64E-NEXT: ld ra, 144(sp) # 8-byte Folded Reload
+; LP64E-NEXT: ld s0, 136(sp) # 8-byte Folded Reload
+; LP64E-NEXT: ld s1, 128(sp) # 8-byte Folded Reload
+; LP64E-NEXT: addi sp, sp, 152
+; LP64E-NEXT: ret
+;
; ILP32F-LABEL: caller:
; ILP32F: # %bb.0:
; ILP32F-NEXT: addi sp, sp, -144
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
index abfa26e8a4f2519..38e3c2d9256cdfa 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck %s -check-prefix=ILP32
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs< %s \
; RUN: | FileCheck %s -check-prefix=LP64
+; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64e -verify-machineinstrs< %s \
+; RUN: | FileCheck %s -check-prefix=LP64E
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=ILP32D
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64d -verify-machineinstrs < %s \
@@ -10,7 +12,7 @@
@var = global [32 x double] zeroinitializer
-; All floating point registers are temporaries for the ilp32 and lp64 ABIs.
+; All floating point registers are temporaries for the ilp32, lp64e and lp64 ABIs.
; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs.
; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns
@@ -157,6 +159,76 @@ define void @callee() nounwind {
; LP64-NEXT: fsd fa5, %lo(var)(a0)
; LP64-NEXT: ret
;
+; LP64E-LABEL: callee:
+; LP64E: # %bb.0:
+; LP64E-NEXT: lui a0, %hi(var)
+; LP64E-NEXT: fld fa5, %lo(var)(a0)
+; LP64E-NEXT: fld fa4, %lo(var+8)(a0)
+; LP64E-NEXT: addi a1, a0, %lo(var)
+; LP64E-NEXT: fld fa3, 16(a1)
+; LP64E-NEXT: fld fa2, 24(a1)
+; LP64E-NEXT: fld fa1, 32(a1)
+; LP64E-NEXT: fld fa0, 40(a1)
+; LP64E-NEXT: fld ft0, 48(a1)
+; LP64E-NEXT: fld ft1, 56(a1)
+; LP64E-NEXT: fld ft2, 64(a1)
+; LP64E-NEXT: fld ft3, 72(a1)
+; LP64E-NEXT: fld ft4, 80(a1)
+; LP64E-NEXT: fld ft5, 88(a1)
+; LP64E-NEXT: fld ft6, 96(a1)
+; LP64E-NEXT: fld ft7, 104(a1)
+; LP64E-NEXT: fld fa6, 112(a1)
+; LP64E-NEXT: fld fa7, 120(a1)
+; LP64E-NEXT: fld ft8, 128(a1)
+; LP64E-NEXT: fld ft9, 136(a1)
+; LP64E-NEXT: fld ft10, 144(a1)
+; LP64E-NEXT: fld ft11, 152(a1)
+; LP64E-NEXT: fld fs0, 160(a1)
+; LP64E-NEXT: fld fs1, 168(a1)
+; LP64E-NEXT: fld fs2, 176(a1)
+; LP64E-NEXT: fld fs3, 184(a1)
+; LP64E-NEXT: fld fs4, 192(a1)
+; LP64E-NEXT: fld fs5, 200(a1)
+; LP64E-NEXT: fld fs6, 208(a1)
+; LP64E-NEXT: fld fs7, 216(a1)
+; LP64E-NEXT: fld fs8, 248(a1)
+; LP64E-NEXT: fld fs9, 240(a1)
+; LP64E-NEXT: fld fs10, 232(a1)
+; LP64E-NEXT: fld fs11, 224(a1)
+; LP64E-NEXT: fsd fs8, 248(a1)
+; LP64E-NEXT: fsd fs9, 240(a1)
+; LP64E-NEXT: fsd fs10, 232(a1)
+; LP64E-NEXT: fsd fs11, 224(a1)
+; LP64E-NEXT: fsd fs7, 216(a1)
+; LP64E-NEXT: fsd fs6, 208(a1)
+; LP64E-NEXT: fsd fs5, 200(a1)
+; LP64E-NEXT: fsd fs4, 192(a1)
+; LP64E-NEXT: fsd fs3, 184(a1)
+; LP64E-NEXT: fsd fs2, 176(a1)
+; LP64E-NEXT: fsd fs1, 168(a1)
+; LP64E-NEXT: fsd fs0, 160(a1)
+; LP64E-NEXT: fsd ft11, 152(a1)
+; LP64E-NEXT: fsd ft10, 144(a1)
+; LP64E-NEXT: fsd ft9, 136(a1)
+; LP64E-NEXT: fsd ft8, 128(a1)
+; LP64E-NEXT: fsd fa7, 120(a1)
+; LP64E-NEXT: fsd fa6, 112(a1)
+; LP64E-NEXT: fsd ft7, 104(a1)
+; LP64E-NEXT: fsd ft6, 96(a1)
+; LP64E-NEXT: fsd ft5, 88(a1)
+; LP64E-NEXT: fsd ft4, 80(a1)
+; LP64E-NEXT: fsd ft3, 72(a1)
+; LP64E-NEXT: fsd ft2, 64(a1)
+; LP64E-NEXT: fsd ft1, 56(a1)
+; LP64E-NEXT: fsd ft0, 48(a1)
+; LP64E-NEXT: fsd fa0, 40(a1)
+; LP64E-NEXT: fsd fa1, 32(a1)
+; LP64E-NEXT: fsd fa2, 24(a1)
+; LP64E-NEXT: fsd fa3, 16(a1)
+; LP64E-NEXT: fsd fa4, %lo(var+8)(a0)
+; LP64E-NEXT: fsd fa5, %lo(var)(a0)
+; LP64E-NEXT: ret
+;
; ILP32D-LABEL: callee:
; ILP32D: # %bb.0:
; ILP32D-NEXT: addi sp, sp, -96
@@ -647,6 +719,149 @@ define void @caller() nounwind {
; LP64-NEXT: addi sp, sp, 288
; LP64-NEXT: ret
;
+; LP64E-LABEL: caller:
+; LP64E: # %bb.0:
+; LP64E-NEXT: addi sp, sp, -280
+; LP64E-NEXT: sd ra, 272(sp) # 8-byte Folded Spill
+; LP64E-NEXT: sd s0, 264(sp) # 8-byte Folded Spill
+; LP64E-NEXT: sd s1, 256(sp) # 8-byte Folded Spill
+; LP64E-NEXT: lui s0, %hi(var)
+; LP64E-NEXT: fld fa5, %lo(var)(s0)
+; LP64E-NEXT: fsd fa5, 248(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, %lo(var+8)(s0)
+; LP64E-NEXT: fsd fa5, 240(sp) # 8-byte Folded Spill
+; LP64E-NEXT: addi s1, s0, %lo(var)
+; LP64E-NEXT: fld fa5, 16(s1)
+; LP64E-NEXT: fsd fa5, 232(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 24(s1)
+; LP64E-NEXT: fsd fa5, 224(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 32(s1)
+; LP64E-NEXT: fsd fa5, 216(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 40(s1)
+; LP64E-NEXT: fsd fa5, 208(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 48(s1)
+; LP64E-NEXT: fsd fa5, 200(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 56(s1)
+; LP64E-NEXT: fsd fa5, 192(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 64(s1)
+; LP64E-NEXT: fsd fa5, 184(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 72(s1)
+; LP64E-NEXT: fsd fa5, 176(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 80(s1)
+; LP64E-NEXT: fsd fa5, 168(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 88(s1)
+; LP64E-NEXT: fsd fa5, 160(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 96(s1)
+; LP64E-NEXT: fsd fa5, 152(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 104(s1)
+; LP64E-NEXT: fsd fa5, 144(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 112(s1)
+; LP64E-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 120(s1)
+; LP64E-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 128(s1)
+; LP64E-NEXT: fsd fa5, 120(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 136(s1)
+; LP64E-NEXT: fsd fa5, 112(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 144(s1)
+; LP64E-NEXT: fsd fa5, 104(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 152(s1)
+; LP64E-NEXT: fsd fa5, 96(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 160(s1)
+; LP64E-NEXT: fsd fa5, 88(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 168(s1)
+; LP64E-NEXT: fsd fa5, 80(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 176(s1)
+; LP64E-NEXT: fsd fa5, 72(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 184(s1)
+; LP64E-NEXT: fsd fa5, 64(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 192(s1)
+; LP64E-NEXT: fsd fa5, 56(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 200(s1)
+; LP64E-NEXT: fsd fa5, 48(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 208(s1)
+; LP64E-NEXT: fsd fa5, 40(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 216(s1)
+; LP64E-NEXT: fsd fa5, 32(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 224(s1)
+; LP64E-NEXT: fsd fa5, 24(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 232(s1)
+; LP64E-NEXT: fsd fa5, 16(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 240(s1)
+; LP64E-NEXT: fsd fa5, 8(sp) # 8-byte Folded Spill
+; LP64E-NEXT: fld fa5, 248(s1)
+; LP64E-NEXT: fsd fa5, 0(sp) # 8-byte Folded Spill
+; LP64E-NEXT: call callee
+; LP64E-NEXT: fld fa5, 0(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 248(s1)
+; LP64E-NEXT: fld fa5, 8(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 240(s1)
+; LP64E-NEXT: fld fa5, 16(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 232(s1)
+; LP64E-NEXT: fld fa5, 24(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 224(s1)
+; LP64E-NEXT: fld fa5, 32(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 216(s1)
+; LP64E-NEXT: fld fa5, 40(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 208(s1)
+; LP64E-NEXT: fld fa5, 48(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 200(s1)
+; LP64E-NEXT: fld fa5, 56(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 192(s1)
+; LP64E-NEXT: fld fa5, 64(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 184(s1)
+; LP64E-NEXT: fld fa5, 72(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 176(s1)
+; LP64E-NEXT: fld fa5, 80(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 168(s1)
+; LP64E-NEXT: fld fa5, 88(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 160(s1)
+; LP64E-NEXT: fld fa5, 96(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 152(s1)
+; LP64E-NEXT: fld fa5, 104(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 144(s1)
+; LP64E-NEXT: fld fa5, 112(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 136(s1)
+; LP64E-NEXT: fld fa5, 120(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 128(s1)
+; LP64E-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 120(s1)
+; LP64E-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 112(s1)
+; LP64E-NEXT: fld fa5, 144(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 104(s1)
+; LP64E-NEXT: fld fa5, 152(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 96(s1)
+; LP64E-NEXT: fld fa5, 160(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 88(s1)
+; LP64E-NEXT: fld fa5, 168(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 80(s1)
+; LP64E-NEXT: fld fa5, 176(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 72(s1)
+; LP64E-NEXT: fld fa5, 184(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 64(s1)
+; LP64E-NEXT: fld fa5, 192(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 56(s1)
+; LP64E-NEXT: fld fa5, 200(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 48(s1)
+; LP64E-NEXT: fld fa5, 208(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 40(s1)
+; LP64E-NEXT: fld fa5, 216(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 32(s1)
+; LP64E-NEXT: fld fa5, 224(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 24(s1)
+; LP64E-NEXT: fld fa5, 232(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, 16(s1)
+; LP64E-NEXT: fld fa5, 240(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, %lo(var+8)(s0)
+; LP64E-NEXT: fld fa5, 248(sp) # 8-byte Folded Reload
+; LP64E-NEXT: fsd fa5, %lo(var)(s0)
+; LP64E-NEXT: ld ra, 272(sp) # 8-byte Folded Reload
+; LP64E-NEXT: ld s0, 264(sp) # 8-byte Folded Reload
+; LP64E-NEXT: ld s1, 256(sp) # 8-byte Folded Reload
+; LP64E-NEXT: addi sp, sp, 280
+; LP64E-NEXT: ret
+;
; ILP32D-LABEL: caller:
; ILP32D: # %bb.0:
; ILP32D-NEXT: addi sp, sp, -272
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 6303a1245677986..710b602df6d0233 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E
; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32f -verify-machineinstrs < %s \
@@ -15,6 +17,8 @@
; RUN: -frame-pointer=all < %s | FileCheck %s -check-prefixes=RV32IZCMP-WITH-FP
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I-LP64E
; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64f -verify-machineinstrs < %s \
@@ -144,6 +148,96 @@ define void @callee() nounwind {
; RV32I-NEXT: addi sp, sp, 80
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: callee:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -48
+; RV32I-ILP32E-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lui a6, %hi(var)
+; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a6)
+; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a6)
+; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a6)
+; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a6)
+; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: addi a5, a6, %lo(var)
+; RV32I-ILP32E-NEXT: lw a0, 16(a5)
+; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 20(a5)
+; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw t0, 24(a5)
+; RV32I-ILP32E-NEXT: lw t1, 28(a5)
+; RV32I-ILP32E-NEXT: lw t2, 32(a5)
+; RV32I-ILP32E-NEXT: lw t3, 36(a5)
+; RV32I-ILP32E-NEXT: lw t4, 40(a5)
+; RV32I-ILP32E-NEXT: lw t5, 44(a5)
+; RV32I-ILP32E-NEXT: lw t6, 48(a5)
+; RV32I-ILP32E-NEXT: lw s2, 52(a5)
+; RV32I-ILP32E-NEXT: lw s3, 56(a5)
+; RV32I-ILP32E-NEXT: lw s4, 60(a5)
+; RV32I-ILP32E-NEXT: lw s5, 64(a5)
+; RV32I-ILP32E-NEXT: lw s6, 68(a5)
+; RV32I-ILP32E-NEXT: lw s7, 72(a5)
+; RV32I-ILP32E-NEXT: lw s8, 76(a5)
+; RV32I-ILP32E-NEXT: lw s9, 80(a5)
+; RV32I-ILP32E-NEXT: lw s10, 84(a5)
+; RV32I-ILP32E-NEXT: lw s11, 88(a5)
+; RV32I-ILP32E-NEXT: lw s0, 92(a5)
+; RV32I-ILP32E-NEXT: lw s1, 96(a5)
+; RV32I-ILP32E-NEXT: lw ra, 100(a5)
+; RV32I-ILP32E-NEXT: lw a7, 104(a5)
+; RV32I-ILP32E-NEXT: lw a4, 108(a5)
+; RV32I-ILP32E-NEXT: lw a0, 124(a5)
+; RV32I-ILP32E-NEXT: lw a1, 120(a5)
+; RV32I-ILP32E-NEXT: lw a2, 116(a5)
+; RV32I-ILP32E-NEXT: lw a3, 112(a5)
+; RV32I-ILP32E-NEXT: sw a0, 124(a5)
+; RV32I-ILP32E-NEXT: sw a1, 120(a5)
+; RV32I-ILP32E-NEXT: sw a2, 116(a5)
+; RV32I-ILP32E-NEXT: sw a3, 112(a5)
+; RV32I-ILP32E-NEXT: sw a4, 108(a5)
+; RV32I-ILP32E-NEXT: sw a7, 104(a5)
+; RV32I-ILP32E-NEXT: sw ra, 100(a5)
+; RV32I-ILP32E-NEXT: sw s1, 96(a5)
+; RV32I-ILP32E-NEXT: sw s0, 92(a5)
+; RV32I-ILP32E-NEXT: sw s11, 88(a5)
+; RV32I-ILP32E-NEXT: sw s10, 84(a5)
+; RV32I-ILP32E-NEXT: sw s9, 80(a5)
+; RV32I-ILP32E-NEXT: sw s8, 76(a5)
+; RV32I-ILP32E-NEXT: sw s7, 72(a5)
+; RV32I-ILP32E-NEXT: sw s6, 68(a5)
+; RV32I-ILP32E-NEXT: sw s5, 64(a5)
+; RV32I-ILP32E-NEXT: sw s4, 60(a5)
+; RV32I-ILP32E-NEXT: sw s3, 56(a5)
+; RV32I-ILP32E-NEXT: sw s2, 52(a5)
+; RV32I-ILP32E-NEXT: sw t6, 48(a5)
+; RV32I-ILP32E-NEXT: sw t5, 44(a5)
+; RV32I-ILP32E-NEXT: sw t4, 40(a5)
+; RV32I-ILP32E-NEXT: sw t3, 36(a5)
+; RV32I-ILP32E-NEXT: sw t2, 32(a5)
+; RV32I-ILP32E-NEXT: sw t1, 28(a5)
+; RV32I-ILP32E-NEXT: sw t0, 24(a5)
+; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 20(a5)
+; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 16(a5)
+; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a6)
+; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a6)
+; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a6)
+; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a6)
+; RV32I-ILP32E-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 48
+; RV32I-ILP32E-NEXT: ret
+;
; RV32I-WITH-FP-LABEL: callee:
; RV32I-WITH-FP: # %bb.0:
; RV32I-WITH-FP-NEXT: addi sp, sp, -80
@@ -563,6 +657,96 @@ define void @callee() nounwind {
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
+; RV64I-LP64E-LABEL: callee:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -80
+; RV64I-LP64E-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lui a6, %hi(var)
+; RV64I-LP64E-NEXT: lw a0, %lo(var)(a6)
+; RV64I-LP64E-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a6)
+; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a6)
+; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a6)
+; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: addi a5, a6, %lo(var)
+; RV64I-LP64E-NEXT: lw a0, 16(a5)
+; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 20(a5)
+; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw t0, 24(a5)
+; RV64I-LP64E-NEXT: lw t1, 28(a5)
+; RV64I-LP64E-NEXT: lw t2, 32(a5)
+; RV64I-LP64E-NEXT: lw t3, 36(a5)
+; RV64I-LP64E-NEXT: lw t4, 40(a5)
+; RV64I-LP64E-NEXT: lw t5, 44(a5)
+; RV64I-LP64E-NEXT: lw t6, 48(a5)
+; RV64I-LP64E-NEXT: lw s2, 52(a5)
+; RV64I-LP64E-NEXT: lw s3, 56(a5)
+; RV64I-LP64E-NEXT: lw s4, 60(a5)
+; RV64I-LP64E-NEXT: lw s5, 64(a5)
+; RV64I-LP64E-NEXT: lw s6, 68(a5)
+; RV64I-LP64E-NEXT: lw s7, 72(a5)
+; RV64I-LP64E-NEXT: lw s8, 76(a5)
+; RV64I-LP64E-NEXT: lw s9, 80(a5)
+; RV64I-LP64E-NEXT: lw s10, 84(a5)
+; RV64I-LP64E-NEXT: lw s11, 88(a5)
+; RV64I-LP64E-NEXT: lw s0, 92(a5)
+; RV64I-LP64E-NEXT: lw s1, 96(a5)
+; RV64I-LP64E-NEXT: lw ra, 100(a5)
+; RV64I-LP64E-NEXT: lw a7, 104(a5)
+; RV64I-LP64E-NEXT: lw a4, 108(a5)
+; RV64I-LP64E-NEXT: lw a0, 124(a5)
+; RV64I-LP64E-NEXT: lw a1, 120(a5)
+; RV64I-LP64E-NEXT: lw a2, 116(a5)
+; RV64I-LP64E-NEXT: lw a3, 112(a5)
+; RV64I-LP64E-NEXT: sw a0, 124(a5)
+; RV64I-LP64E-NEXT: sw a1, 120(a5)
+; RV64I-LP64E-NEXT: sw a2, 116(a5)
+; RV64I-LP64E-NEXT: sw a3, 112(a5)
+; RV64I-LP64E-NEXT: sw a4, 108(a5)
+; RV64I-LP64E-NEXT: sw a7, 104(a5)
+; RV64I-LP64E-NEXT: sw ra, 100(a5)
+; RV64I-LP64E-NEXT: sw s1, 96(a5)
+; RV64I-LP64E-NEXT: sw s0, 92(a5)
+; RV64I-LP64E-NEXT: sw s11, 88(a5)
+; RV64I-LP64E-NEXT: sw s10, 84(a5)
+; RV64I-LP64E-NEXT: sw s9, 80(a5)
+; RV64I-LP64E-NEXT: sw s8, 76(a5)
+; RV64I-LP64E-NEXT: sw s7, 72(a5)
+; RV64I-LP64E-NEXT: sw s6, 68(a5)
+; RV64I-LP64E-NEXT: sw s5, 64(a5)
+; RV64I-LP64E-NEXT: sw s4, 60(a5)
+; RV64I-LP64E-NEXT: sw s3, 56(a5)
+; RV64I-LP64E-NEXT: sw s2, 52(a5)
+; RV64I-LP64E-NEXT: sw t6, 48(a5)
+; RV64I-LP64E-NEXT: sw t5, 44(a5)
+; RV64I-LP64E-NEXT: sw t4, 40(a5)
+; RV64I-LP64E-NEXT: sw t3, 36(a5)
+; RV64I-LP64E-NEXT: sw t2, 32(a5)
+; RV64I-LP64E-NEXT: sw t1, 28(a5)
+; RV64I-LP64E-NEXT: sw t0, 24(a5)
+; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 20(a5)
+; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 16(a5)
+; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a6)
+; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a6)
+; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a6)
+; RV64I-LP64E-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var)(a6)
+; RV64I-LP64E-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 80
+; RV64I-LP64E-NEXT: ret
+;
; RV64I-WITH-FP-LABEL: callee:
; RV64I-WITH-FP: # %bb.0:
; RV64I-WITH-FP-NEXT: addi sp, sp, -160
@@ -1023,6 +1207,148 @@ define void @caller() nounwind {
; RV32I-NEXT: addi sp, sp, 144
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -136
+; RV32I-ILP32E-NEXT: sw ra, 132(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 128(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s1, 124(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lui a0, %hi(var)
+; RV32I-ILP32E-NEXT: lw a1, %lo(var)(a0)
+; RV32I-ILP32E-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a1, %lo(var+4)(a0)
+; RV32I-ILP32E-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a1, %lo(var+8)(a0)
+; RV32I-ILP32E-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a1, %lo(var+12)(a0)
+; RV32I-ILP32E-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: addi s1, a0, %lo(var)
+; RV32I-ILP32E-NEXT: lw a0, 16(s1)
+; RV32I-ILP32E-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 20(s1)
+; RV32I-ILP32E-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 24(s1)
+; RV32I-ILP32E-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 28(s1)
+; RV32I-ILP32E-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 32(s1)
+; RV32I-ILP32E-NEXT: sw a0, 88(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 36(s1)
+; RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 40(s1)
+; RV32I-ILP32E-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 44(s1)
+; RV32I-ILP32E-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 48(s1)
+; RV32I-ILP32E-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 52(s1)
+; RV32I-ILP32E-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 56(s1)
+; RV32I-ILP32E-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 60(s1)
+; RV32I-ILP32E-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 64(s1)
+; RV32I-ILP32E-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 68(s1)
+; RV32I-ILP32E-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 72(s1)
+; RV32I-ILP32E-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 76(s1)
+; RV32I-ILP32E-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 80(s1)
+; RV32I-ILP32E-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 84(s1)
+; RV32I-ILP32E-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 88(s1)
+; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 92(s1)
+; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 96(s1)
+; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 100(s1)
+; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 104(s1)
+; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 108(s1)
+; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 112(s1)
+; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 116(s1)
+; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw a0, 120(s1)
+; RV32I-ILP32E-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: lw s0, 124(s1)
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: sw s0, 124(s1)
+; RV32I-ILP32E-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 120(s1)
+; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 116(s1)
+; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 112(s1)
+; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 108(s1)
+; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 104(s1)
+; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 100(s1)
+; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 96(s1)
+; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 92(s1)
+; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 88(s1)
+; RV32I-ILP32E-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 84(s1)
+; RV32I-ILP32E-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 80(s1)
+; RV32I-ILP32E-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 76(s1)
+; RV32I-ILP32E-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 72(s1)
+; RV32I-ILP32E-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 68(s1)
+; RV32I-ILP32E-NEXT: lw a0, 56(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 64(s1)
+; RV32I-ILP32E-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 60(s1)
+; RV32I-ILP32E-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 56(s1)
+; RV32I-ILP32E-NEXT: lw a0, 68(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 52(s1)
+; RV32I-ILP32E-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 48(s1)
+; RV32I-ILP32E-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 44(s1)
+; RV32I-ILP32E-NEXT: lw a0, 80(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 40(s1)
+; RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 36(s1)
+; RV32I-ILP32E-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 32(s1)
+; RV32I-ILP32E-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 28(s1)
+; RV32I-ILP32E-NEXT: lw a0, 96(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 24(s1)
+; RV32I-ILP32E-NEXT: lw a0, 100(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 20(s1)
+; RV32I-ILP32E-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, 16(s1)
+; RV32I-ILP32E-NEXT: lui a1, %hi(var)
+; RV32I-ILP32E-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a1)
+; RV32I-ILP32E-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a1)
+; RV32I-ILP32E-NEXT: lw a0, 116(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a1)
+; RV32I-ILP32E-NEXT: lw a0, 120(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a1)
+; RV32I-ILP32E-NEXT: lw ra, 132(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 128(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s1, 124(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 136
+; RV32I-ILP32E-NEXT: ret
+;
; RV32I-WITH-FP-LABEL: caller:
; RV32I-WITH-FP: # %bb.0:
; RV32I-WITH-FP-NEXT: addi sp, sp, -144
@@ -1576,6 +1902,148 @@ define void @caller() nounwind {
; RV64I-NEXT: addi sp, sp, 288
; RV64I-NEXT: ret
;
+; RV64I-LP64E-LABEL: caller:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -272
+; RV64I-LP64E-NEXT: sd ra, 264(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 256(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s1, 248(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lui a0, %hi(var)
+; RV64I-LP64E-NEXT: lw a1, %lo(var)(a0)
+; RV64I-LP64E-NEXT: sd a1, 240(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a1, %lo(var+4)(a0)
+; RV64I-LP64E-NEXT: sd a1, 232(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a1, %lo(var+8)(a0)
+; RV64I-LP64E-NEXT: sd a1, 224(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a1, %lo(var+12)(a0)
+; RV64I-LP64E-NEXT: sd a1, 216(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: addi s1, a0, %lo(var)
+; RV64I-LP64E-NEXT: lw a0, 16(s1)
+; RV64I-LP64E-NEXT: sd a0, 208(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 20(s1)
+; RV64I-LP64E-NEXT: sd a0, 200(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 24(s1)
+; RV64I-LP64E-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 28(s1)
+; RV64I-LP64E-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 32(s1)
+; RV64I-LP64E-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 36(s1)
+; RV64I-LP64E-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 40(s1)
+; RV64I-LP64E-NEXT: sd a0, 160(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 44(s1)
+; RV64I-LP64E-NEXT: sd a0, 152(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 48(s1)
+; RV64I-LP64E-NEXT: sd a0, 144(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 52(s1)
+; RV64I-LP64E-NEXT: sd a0, 136(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 56(s1)
+; RV64I-LP64E-NEXT: sd a0, 128(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 60(s1)
+; RV64I-LP64E-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 64(s1)
+; RV64I-LP64E-NEXT: sd a0, 112(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 68(s1)
+; RV64I-LP64E-NEXT: sd a0, 104(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 72(s1)
+; RV64I-LP64E-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 76(s1)
+; RV64I-LP64E-NEXT: sd a0, 88(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 80(s1)
+; RV64I-LP64E-NEXT: sd a0, 80(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 84(s1)
+; RV64I-LP64E-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 88(s1)
+; RV64I-LP64E-NEXT: sd a0, 64(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 92(s1)
+; RV64I-LP64E-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 96(s1)
+; RV64I-LP64E-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 100(s1)
+; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 104(s1)
+; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 108(s1)
+; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 112(s1)
+; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 116(s1)
+; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw a0, 120(s1)
+; RV64I-LP64E-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: lw s0, 124(s1)
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: sw s0, 124(s1)
+; RV64I-LP64E-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 120(s1)
+; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 116(s1)
+; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 112(s1)
+; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 108(s1)
+; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 104(s1)
+; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 100(s1)
+; RV64I-LP64E-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 96(s1)
+; RV64I-LP64E-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 92(s1)
+; RV64I-LP64E-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 88(s1)
+; RV64I-LP64E-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 84(s1)
+; RV64I-LP64E-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 80(s1)
+; RV64I-LP64E-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 76(s1)
+; RV64I-LP64E-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 72(s1)
+; RV64I-LP64E-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 68(s1)
+; RV64I-LP64E-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 64(s1)
+; RV64I-LP64E-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 60(s1)
+; RV64I-LP64E-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 56(s1)
+; RV64I-LP64E-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 52(s1)
+; RV64I-LP64E-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 48(s1)
+; RV64I-LP64E-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 44(s1)
+; RV64I-LP64E-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 40(s1)
+; RV64I-LP64E-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 36(s1)
+; RV64I-LP64E-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 32(s1)
+; RV64I-LP64E-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 28(s1)
+; RV64I-LP64E-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 24(s1)
+; RV64I-LP64E-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 20(s1)
+; RV64I-LP64E-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, 16(s1)
+; RV64I-LP64E-NEXT: lui a1, %hi(var)
+; RV64I-LP64E-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a1)
+; RV64I-LP64E-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a1)
+; RV64I-LP64E-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a1)
+; RV64I-LP64E-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: sw a0, %lo(var)(a1)
+; RV64I-LP64E-NEXT: ld ra, 264(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 256(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s1, 248(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 272
+; RV64I-LP64E-NEXT: ret
+;
; RV64I-WITH-FP-LABEL: caller:
; RV64I-WITH-FP: # %bb.0:
; RV64I-WITH-FP-NEXT: addi sp, sp, -288
@@ -2007,6 +2475,13 @@ define void @foo() {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: foo:
+; RV32I-ILP32E: # %bb.0: # %entry
+; RV32I-ILP32E-NEXT: #APP
+; RV32I-ILP32E-NEXT: li s4, 0
+; RV32I-ILP32E-NEXT: #NO_APP
+; RV32I-ILP32E-NEXT: ret
+;
; RV32I-WITH-FP-LABEL: foo:
; RV32I-WITH-FP: # %bb.0: # %entry
; RV32I-WITH-FP-NEXT: addi sp, sp, -16
@@ -2072,6 +2547,13 @@ define void @foo() {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-LP64E-LABEL: foo:
+; RV64I-LP64E: # %bb.0: # %entry
+; RV64I-LP64E-NEXT: #APP
+; RV64I-LP64E-NEXT: li s4, 0
+; RV64I-LP64E-NEXT: #NO_APP
+; RV64I-LP64E-NEXT: ret
+;
; RV64I-WITH-FP-LABEL: foo:
; RV64I-WITH-FP: # %bb.0: # %entry
; RV64I-WITH-FP-NEXT: addi sp, sp, -32
@@ -2143,6 +2625,13 @@ define void @bar() {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: bar:
+; RV32I-ILP32E: # %bb.0: # %entry
+; RV32I-ILP32E-NEXT: #APP
+; RV32I-ILP32E-NEXT: li s11, 0
+; RV32I-ILP32E-NEXT: #NO_APP
+; RV32I-ILP32E-NEXT: ret
+;
; RV32I-WITH-FP-LABEL: bar:
; RV32I-WITH-FP: # %bb.0: # %entry
; RV32I-WITH-FP-NEXT: addi sp, sp, -16
@@ -2208,6 +2697,13 @@ define void @bar() {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-LP64E-LABEL: bar:
+; RV64I-LP64E: # %bb.0: # %entry
+; RV64I-LP64E-NEXT: #APP
+; RV64I-LP64E-NEXT: li s11, 0
+; RV64I-LP64E-NEXT: #NO_APP
+; RV64I-LP64E-NEXT: ret
+;
; RV64I-WITH-FP-LABEL: bar:
; RV64I-WITH-FP: # %bb.0: # %entry
; RV64I-WITH-FP-NEXT: addi sp, sp, -32
@@ -2284,6 +2780,23 @@ define void @varargs(...) {
; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: varargs:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -28
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 28
+; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -28
+; RV32I-ILP32E-NEXT: sw a5, 24(sp)
+; RV32I-ILP32E-NEXT: sw a4, 20(sp)
+; RV32I-ILP32E-NEXT: sw a3, 16(sp)
+; RV32I-ILP32E-NEXT: sw a2, 12(sp)
+; RV32I-ILP32E-NEXT: sw a1, 8(sp)
+; RV32I-ILP32E-NEXT: sw a0, 4(sp)
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 28
+; RV32I-ILP32E-NEXT: ret
+;
; RV32I-WITH-FP-LABEL: varargs:
; RV32I-WITH-FP: # %bb.0:
; RV32I-WITH-FP-NEXT: addi sp, sp, -48
@@ -2370,6 +2883,23 @@ define void @varargs(...) {
; RV64I-NEXT: addi sp, sp, 80
; RV64I-NEXT: ret
;
+; RV64I-LP64E-LABEL: varargs:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -56
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 56
+; RV64I-LP64E-NEXT: sd ra, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -56
+; RV64I-LP64E-NEXT: sd a5, 48(sp)
+; RV64I-LP64E-NEXT: sd a4, 40(sp)
+; RV64I-LP64E-NEXT: sd a3, 32(sp)
+; RV64I-LP64E-NEXT: sd a2, 24(sp)
+; RV64I-LP64E-NEXT: sd a1, 16(sp)
+; RV64I-LP64E-NEXT: sd a0, 8(sp)
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 56
+; RV64I-LP64E-NEXT: ret
+;
; RV64I-WITH-FP-LABEL: varargs:
; RV64I-WITH-FP: # %bb.0:
; RV64I-WITH-FP-NEXT: addi sp, sp, -80
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
new file mode 100644
index 000000000000000..01a47c18c7a483c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
@@ -0,0 +1,2549 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \
+; RUN: -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+save-restore -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=ILP32E-FPELIM-SAVE-RESTORE %s
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+save-restore -frame-pointer=all \
+; RUN: -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=ILP32E-WITHFP-SAVE-RESTORE %s
+
+; This file contains tests that will have
diff ering output for the ilp32e ABIs.
+
+define i32 @callee_float_in_regs(i32 %a, float %b) {
+; ILP32E-FPELIM-LABEL: callee_float_in_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -8
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: mv s0, a0
+; ILP32E-FPELIM-NEXT: mv a0, a1
+; ILP32E-FPELIM-NEXT: call __fixsfsi
+; ILP32E-FPELIM-NEXT: add a0, s0, a0
+; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 8
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_float_in_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -12
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12
+; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12
+; ILP32E-WITHFP-NEXT: addi s0, sp, 12
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: mv s1, a0
+; ILP32E-WITHFP-NEXT: mv a0, a1
+; ILP32E-WITHFP-NEXT: call __fixsfsi
+; ILP32E-WITHFP-NEXT: add a0, s1, a0
+; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 12
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_float_in_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv s0, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __fixsfsi
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, s0, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_float_in_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s1, -12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv s1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __fixsfsi
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, s1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_2
+ %b_fptosi = fptosi float %b to i32
+ %1 = add i32 %a, %b_fptosi
+ ret i32 %1
+}
+
+define i32 @caller_float_in_regs() {
+; ILP32E-FPELIM-LABEL: caller_float_in_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: lui a1, 262144
+; ILP32E-FPELIM-NEXT: call callee_float_in_regs
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_float_in_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: lui a1, 262144
+; ILP32E-WITHFP-NEXT: call callee_float_in_regs
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_float_in_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 262144
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_float_in_regs
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_float_in_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 262144
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_float_in_regs
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_float_in_regs(i32 1, float 2.0)
+ ret i32 %1
+}
+
+define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) {
+; ILP32E-FPELIM-LABEL: callee_float_on_stack:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lw a0, 8(sp)
+; ILP32E-FPELIM-NEXT: lw a1, 0(sp)
+; ILP32E-FPELIM-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_float_on_stack:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lw a0, 8(s0)
+; ILP32E-WITHFP-NEXT: lw a1, 0(s0)
+; ILP32E-WITHFP-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_float_on_stack:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_float_on_stack:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = trunc i64 %d to i32
+ %2 = bitcast float %e to i32
+ %3 = add i32 %1, %2
+ ret i32 %3
+}
+
+define i32 @caller_float_on_stack() {
+; ILP32E-FPELIM-LABEL: caller_float_on_stack:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -16
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: lui a0, 264704
+; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-NEXT: li a1, 4
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a2, 2
+; ILP32E-FPELIM-NEXT: li a4, 3
+; ILP32E-FPELIM-NEXT: sw a1, 0(sp)
+; ILP32E-FPELIM-NEXT: li a1, 0
+; ILP32E-FPELIM-NEXT: li a3, 0
+; ILP32E-FPELIM-NEXT: li a5, 0
+; ILP32E-FPELIM-NEXT: call callee_float_on_stack
+; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 16
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_float_on_stack:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -20
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 20
+; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 20
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lui a0, 264704
+; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-NEXT: li a1, 4
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a2, 2
+; ILP32E-WITHFP-NEXT: li a4, 3
+; ILP32E-WITHFP-NEXT: sw a1, 0(sp)
+; ILP32E-WITHFP-NEXT: li a1, 0
+; ILP32E-WITHFP-NEXT: li a3, 0
+; ILP32E-WITHFP-NEXT: li a5, 0
+; ILP32E-WITHFP-NEXT: call callee_float_on_stack
+; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 20
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_float_on_stack:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -12
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 264704
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 3
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_float_on_stack
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 12
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_float_on_stack:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 20
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 264704
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 3
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_float_on_stack
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0)
+ ret i32 %1
+}
+
+define float @callee_tiny_scalar_ret() {
+; ILP32E-FPELIM-LABEL: callee_tiny_scalar_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lui a0, 260096
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_tiny_scalar_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lui a0, 260096
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_tiny_scalar_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 260096
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_tiny_scalar_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 260096
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ ret float 1.0
+}
+
+define i32 @caller_tiny_scalar_ret() {
+; ILP32E-FPELIM-LABEL: caller_tiny_scalar_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: call callee_tiny_scalar_ret
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_tiny_scalar_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: call callee_tiny_scalar_ret
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_tiny_scalar_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_tiny_scalar_ret
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_tiny_scalar_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_tiny_scalar_ret
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call float @callee_tiny_scalar_ret()
+ %2 = bitcast float %1 to i32
+ ret i32 %2
+}
+
+; Check that on RV32 ilp32e, double is passed in a pair of registers. Unlike
+; the convention for varargs, this need not be an aligned pair.
+
+define i32 @callee_double_in_regs(i32 %a, double %b) {
+; ILP32E-FPELIM-LABEL: callee_double_in_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -8
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: mv s0, a0
+; ILP32E-FPELIM-NEXT: mv a0, a1
+; ILP32E-FPELIM-NEXT: mv a1, a2
+; ILP32E-FPELIM-NEXT: call __fixdfsi
+; ILP32E-FPELIM-NEXT: add a0, s0, a0
+; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 8
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_double_in_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -12
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12
+; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12
+; ILP32E-WITHFP-NEXT: addi s0, sp, 12
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: mv s1, a0
+; ILP32E-WITHFP-NEXT: mv a0, a1
+; ILP32E-WITHFP-NEXT: mv a1, a2
+; ILP32E-WITHFP-NEXT: call __fixdfsi
+; ILP32E-WITHFP-NEXT: add a0, s1, a0
+; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 12
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_double_in_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv s0, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, a2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __fixdfsi
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, s0, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_double_in_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s1, -12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv s1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, a2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __fixdfsi
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, s1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_2
+ %b_fptosi = fptosi double %b to i32
+ %1 = add i32 %a, %b_fptosi
+ ret i32 %1
+}
+
+define i32 @caller_double_in_regs() {
+; ILP32E-FPELIM-LABEL: caller_double_in_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: lui a2, 262144
+; ILP32E-FPELIM-NEXT: li a1, 0
+; ILP32E-FPELIM-NEXT: call callee_double_in_regs
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_double_in_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: lui a2, 262144
+; ILP32E-WITHFP-NEXT: li a1, 0
+; ILP32E-WITHFP-NEXT: call callee_double_in_regs
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_double_in_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a2, 262144
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_double_in_regs
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_double_in_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a2, 262144
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_double_in_regs
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_double_in_regs(i32 1, double 2.0)
+ ret i32 %1
+}
+
+; Check 2x*xlen values are aligned appropriately when passed on the stack
+; Must keep define on a single line due to an update_llc_test_checks.py limitation
+define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %f, i32 %g, i32 %h, double %i, i32 %j, [2 x i32] %k) {
+; The double should be 8-byte aligned on the stack, but the two-element array
+; should only be 4-byte aligned
+; ILP32E-FPELIM-LABEL: callee_aligned_stack:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lw a0, 0(a2)
+; ILP32E-FPELIM-NEXT: lw a1, 12(sp)
+; ILP32E-FPELIM-NEXT: lw a2, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a3, 8(sp)
+; ILP32E-FPELIM-NEXT: lw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: lw a5, 20(sp)
+; ILP32E-FPELIM-NEXT: add a0, a0, a2
+; ILP32E-FPELIM-NEXT: add a1, a3, a1
+; ILP32E-FPELIM-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-NEXT: add a4, a5, a4
+; ILP32E-FPELIM-NEXT: add a0, a0, a4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_aligned_stack:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lw a0, 0(a2)
+; ILP32E-WITHFP-NEXT: lw a1, 12(s0)
+; ILP32E-WITHFP-NEXT: lw a2, 4(s0)
+; ILP32E-WITHFP-NEXT: lw a3, 8(s0)
+; ILP32E-WITHFP-NEXT: lw a4, 24(s0)
+; ILP32E-WITHFP-NEXT: lw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: add a0, a0, a2
+; ILP32E-WITHFP-NEXT: add a1, a3, a1
+; ILP32E-WITHFP-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-NEXT: add a4, a5, a4
+; ILP32E-WITHFP-NEXT: add a0, a0, a4
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_aligned_stack:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a2)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 24(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a3, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a4, a5, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_aligned_stack:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a2)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 4(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 8(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 24(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 20(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a3, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a4, a5, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = bitcast fp128 %c to i128
+ %2 = trunc i128 %1 to i32
+ %3 = add i32 %2, %g
+ %4 = add i32 %3, %h
+ %5 = bitcast double %i to i64
+ %6 = trunc i64 %5 to i32
+ %7 = add i32 %4, %6
+ %8 = add i32 %7, %j
+ %9 = extractvalue [2 x i32] %k, 0
+ %10 = add i32 %8, %9
+ ret i32 %10
+}
+
+define void @caller_aligned_stack() {
+; The double should be 8-byte aligned on the stack, but the two-element array
+; should only be 4-byte aligned
+; ILP32E-FPELIM-LABEL: caller_aligned_stack:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -64
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64
+; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: addi s0, sp, 64
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-NEXT: li a0, 18
+; ILP32E-FPELIM-NEXT: sw a0, 28(sp)
+; ILP32E-FPELIM-NEXT: li a0, 17
+; ILP32E-FPELIM-NEXT: sw a0, 24(sp)
+; ILP32E-FPELIM-NEXT: li a0, 16
+; ILP32E-FPELIM-NEXT: sw a0, 20(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 262236
+; ILP32E-FPELIM-NEXT: addi a0, a0, 655
+; ILP32E-FPELIM-NEXT: sw a0, 16(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 377487
+; ILP32E-FPELIM-NEXT: addi a0, a0, 1475
+; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-NEXT: li a0, 15
+; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-NEXT: li a0, 14
+; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: li a0, 4
+; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 262153
+; ILP32E-FPELIM-NEXT: addi a0, a0, 491
+; ILP32E-FPELIM-NEXT: sw a0, 44(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 545260
+; ILP32E-FPELIM-NEXT: addi a0, a0, -1967
+; ILP32E-FPELIM-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 964690
+; ILP32E-FPELIM-NEXT: addi a0, a0, -328
+; ILP32E-FPELIM-NEXT: sw a0, 36(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 335544
+; ILP32E-FPELIM-NEXT: addi a6, a0, 1311
+; ILP32E-FPELIM-NEXT: lui a0, 688509
+; ILP32E-FPELIM-NEXT: addi a5, a0, -2048
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a1, 11
+; ILP32E-FPELIM-NEXT: addi a2, sp, 32
+; ILP32E-FPELIM-NEXT: li a3, 12
+; ILP32E-FPELIM-NEXT: li a4, 13
+; ILP32E-FPELIM-NEXT: sw a6, 32(sp)
+; ILP32E-FPELIM-NEXT: call callee_aligned_stack
+; ILP32E-FPELIM-NEXT: addi sp, s0, -64
+; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 64
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_aligned_stack:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -64
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64
+; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 64
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-NEXT: li a0, 18
+; ILP32E-WITHFP-NEXT: sw a0, 28(sp)
+; ILP32E-WITHFP-NEXT: li a0, 17
+; ILP32E-WITHFP-NEXT: sw a0, 24(sp)
+; ILP32E-WITHFP-NEXT: li a0, 16
+; ILP32E-WITHFP-NEXT: sw a0, 20(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 262236
+; ILP32E-WITHFP-NEXT: addi a0, a0, 655
+; ILP32E-WITHFP-NEXT: sw a0, 16(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 377487
+; ILP32E-WITHFP-NEXT: addi a0, a0, 1475
+; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-NEXT: li a0, 15
+; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
+; ILP32E-WITHFP-NEXT: li a0, 14
+; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
+; ILP32E-WITHFP-NEXT: li a0, 4
+; ILP32E-WITHFP-NEXT: sw a0, 0(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 262153
+; ILP32E-WITHFP-NEXT: addi a0, a0, 491
+; ILP32E-WITHFP-NEXT: sw a0, 44(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 545260
+; ILP32E-WITHFP-NEXT: addi a0, a0, -1967
+; ILP32E-WITHFP-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 964690
+; ILP32E-WITHFP-NEXT: addi a0, a0, -328
+; ILP32E-WITHFP-NEXT: sw a0, 36(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 335544
+; ILP32E-WITHFP-NEXT: addi a6, a0, 1311
+; ILP32E-WITHFP-NEXT: lui a0, 688509
+; ILP32E-WITHFP-NEXT: addi a5, a0, -2048
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a1, 11
+; ILP32E-WITHFP-NEXT: addi a2, sp, 32
+; ILP32E-WITHFP-NEXT: li a3, 12
+; ILP32E-WITHFP-NEXT: li a4, 13
+; ILP32E-WITHFP-NEXT: sw a6, 32(sp)
+; ILP32E-WITHFP-NEXT: call callee_aligned_stack
+; ILP32E-WITHFP-NEXT: addi sp, s0, -64
+; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 64
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_aligned_stack:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 18
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 17
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 24(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262236
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 655
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 377487
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 1475
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 15
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 14
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262153
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 491
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 44(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 545260
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -1967
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 964690
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -328
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 36(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 335544
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 1311
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 688509
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, a0, -2048
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 11
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 32
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 12
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 32(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_aligned_stack
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_aligned_stack:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 18
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 17
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 24(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 20(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262236
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 655
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 16(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 377487
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 1475
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 15
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 14
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262153
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 491
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 44(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 545260
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -1967
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 964690
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -328
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 36(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 335544
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 1311
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 688509
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, a0, -2048
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 11
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 32
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 12
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 32(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_aligned_stack
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_aligned_stack(i32 1, i32 11,
+ fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13,
+ i64 20000000000, i32 14, i32 15, double 2.720000e+00, i32 16,
+ [2 x i32] [i32 17, i32 18])
+ ret void
+}
+
+define double @callee_small_scalar_ret() {
+; ILP32E-FPELIM-LABEL: callee_small_scalar_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lui a1, 261888
+; ILP32E-FPELIM-NEXT: li a0, 0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_small_scalar_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lui a1, 261888
+; ILP32E-WITHFP-NEXT: li a0, 0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_scalar_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 261888
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_scalar_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 261888
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ ret double 1.0
+}
+
+define i64 @caller_small_scalar_ret() {
+; ILP32E-FPELIM-LABEL: caller_small_scalar_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: call callee_small_scalar_ret
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_small_scalar_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: call callee_small_scalar_ret
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_scalar_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_scalar_ret
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_scalar_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_scalar_ret
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call double @callee_small_scalar_ret()
+ %2 = bitcast double %1 to i64
+ ret i64 %2
+}
+
+; Check that on RV32, i64 is passed in a pair of registers. Unlike
+; the convention for varargs, this need not be an aligned pair.
+
+define i32 @callee_i64_in_regs(i32 %a, i64 %b) {
+; ILP32E-FPELIM-LABEL: callee_i64_in_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_i64_in_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_i64_in_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_i64_in_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %b_trunc = trunc i64 %b to i32
+ %1 = add i32 %a, %b_trunc
+ ret i32 %1
+}
+
+define i32 @caller_i64_in_regs() {
+; ILP32E-FPELIM-LABEL: caller_i64_in_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a1, 2
+; ILP32E-FPELIM-NEXT: li a2, 0
+; ILP32E-FPELIM-NEXT: call callee_i64_in_regs
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_i64_in_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a1, 2
+; ILP32E-WITHFP-NEXT: li a2, 0
+; ILP32E-WITHFP-NEXT: call callee_i64_in_regs
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_i64_in_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_i64_in_regs
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_i64_in_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_i64_in_regs
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_i64_in_regs(i32 1, i64 2)
+ ret i32 %1
+}
+
+; Check that the stack is used once the GPRs are exhausted
+
+define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) {
+; ILP32E-FPELIM-LABEL: callee_many_scalars:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lw a6, 12(sp)
+; ILP32E-FPELIM-NEXT: lw a7, 0(sp)
+; ILP32E-FPELIM-NEXT: lw t0, 4(sp)
+; ILP32E-FPELIM-NEXT: lw t1, 8(sp)
+; ILP32E-FPELIM-NEXT: andi a0, a0, 255
+; ILP32E-FPELIM-NEXT: slli a1, a1, 16
+; ILP32E-FPELIM-NEXT: srli a1, a1, 16
+; ILP32E-FPELIM-NEXT: add a0, a0, a2
+; ILP32E-FPELIM-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-NEXT: xor a1, a4, t1
+; ILP32E-FPELIM-NEXT: xor a2, a3, t0
+; ILP32E-FPELIM-NEXT: or a1, a2, a1
+; ILP32E-FPELIM-NEXT: seqz a1, a1
+; ILP32E-FPELIM-NEXT: add a0, a0, a5
+; ILP32E-FPELIM-NEXT: add a0, a0, a7
+; ILP32E-FPELIM-NEXT: add a0, a0, a6
+; ILP32E-FPELIM-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_many_scalars:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lw a6, 12(s0)
+; ILP32E-WITHFP-NEXT: lw a7, 0(s0)
+; ILP32E-WITHFP-NEXT: lw t0, 4(s0)
+; ILP32E-WITHFP-NEXT: lw t1, 8(s0)
+; ILP32E-WITHFP-NEXT: andi a0, a0, 255
+; ILP32E-WITHFP-NEXT: slli a1, a1, 16
+; ILP32E-WITHFP-NEXT: srli a1, a1, 16
+; ILP32E-WITHFP-NEXT: add a0, a0, a2
+; ILP32E-WITHFP-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-NEXT: xor a1, a4, t1
+; ILP32E-WITHFP-NEXT: xor a2, a3, t0
+; ILP32E-WITHFP-NEXT: or a1, a2, a1
+; ILP32E-WITHFP-NEXT: seqz a1, a1
+; ILP32E-WITHFP-NEXT: add a0, a0, a5
+; ILP32E-WITHFP-NEXT: add a0, a0, a7
+; ILP32E-WITHFP-NEXT: add a0, a0, a6
+; ILP32E-WITHFP-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_many_scalars:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t0, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t1, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi a0, a0, 255
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: slli a1, a1, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: srli a1, a1, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a4, t1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, t0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a2, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a1, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a7
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a6
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_many_scalars:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 0(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t0, 4(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t1, 8(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi a0, a0, 255
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: slli a1, a1, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: srli a1, a1, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a4, t1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, t0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a2, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a1, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a7
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a6
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %a_ext = zext i8 %a to i32
+ %b_ext = zext i16 %b to i32
+ %1 = add i32 %a_ext, %b_ext
+ %2 = add i32 %1, %c
+ %3 = icmp eq i64 %d, %g
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %4, %2
+ %6 = add i32 %5, %e
+ %7 = add i32 %6, %f
+ %8 = add i32 %7, %h
+ ret i32 %8
+}
+
+define i32 @caller_many_scalars() {
+; ILP32E-FPELIM-LABEL: caller_many_scalars:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -20
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 20
+; ILP32E-FPELIM-NEXT: sw ra, 16(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: li a0, 8
+; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-NEXT: li a0, 7
+; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: li a4, 6
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a1, 2
+; ILP32E-FPELIM-NEXT: li a2, 3
+; ILP32E-FPELIM-NEXT: li a3, 4
+; ILP32E-FPELIM-NEXT: li a5, 5
+; ILP32E-FPELIM-NEXT: sw a4, 0(sp)
+; ILP32E-FPELIM-NEXT: li a4, 0
+; ILP32E-FPELIM-NEXT: call callee_many_scalars
+; ILP32E-FPELIM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 20
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_many_scalars:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -24
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a0, 8
+; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-NEXT: li a0, 7
+; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
+; ILP32E-WITHFP-NEXT: li a4, 6
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a1, 2
+; ILP32E-WITHFP-NEXT: li a2, 3
+; ILP32E-WITHFP-NEXT: li a3, 4
+; ILP32E-WITHFP-NEXT: li a5, 5
+; ILP32E-WITHFP-NEXT: sw a4, 0(sp)
+; ILP32E-WITHFP-NEXT: li a4, 0
+; ILP32E-WITHFP-NEXT: call callee_many_scalars
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 24
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_many_scalars:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 6
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_many_scalars
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_many_scalars:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 6
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_many_scalars
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8)
+ ret i32 %1
+}
+
+; Check that i128 and fp128 are passed indirectly
+
+define i32 @callee_large_scalars(i128 %a, fp128 %b) {
+; ILP32E-FPELIM-LABEL: callee_large_scalars:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lw a2, 0(a1)
+; ILP32E-FPELIM-NEXT: lw a3, 0(a0)
+; ILP32E-FPELIM-NEXT: lw a4, 4(a1)
+; ILP32E-FPELIM-NEXT: lw a5, 12(a1)
+; ILP32E-FPELIM-NEXT: lw a6, 12(a0)
+; ILP32E-FPELIM-NEXT: lw a7, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a1, 8(a1)
+; ILP32E-FPELIM-NEXT: lw a0, 8(a0)
+; ILP32E-FPELIM-NEXT: xor a5, a6, a5
+; ILP32E-FPELIM-NEXT: xor a4, a7, a4
+; ILP32E-FPELIM-NEXT: or a4, a4, a5
+; ILP32E-FPELIM-NEXT: xor a0, a0, a1
+; ILP32E-FPELIM-NEXT: xor a2, a3, a2
+; ILP32E-FPELIM-NEXT: or a0, a2, a0
+; ILP32E-FPELIM-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-NEXT: seqz a0, a0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_large_scalars:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lw a2, 0(a1)
+; ILP32E-WITHFP-NEXT: lw a3, 0(a0)
+; ILP32E-WITHFP-NEXT: lw a4, 4(a1)
+; ILP32E-WITHFP-NEXT: lw a5, 12(a1)
+; ILP32E-WITHFP-NEXT: lw a6, 12(a0)
+; ILP32E-WITHFP-NEXT: lw a7, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a1, 8(a1)
+; ILP32E-WITHFP-NEXT: lw a0, 8(a0)
+; ILP32E-WITHFP-NEXT: xor a5, a6, a5
+; ILP32E-WITHFP-NEXT: xor a4, a7, a4
+; ILP32E-WITHFP-NEXT: or a4, a4, a5
+; ILP32E-WITHFP-NEXT: xor a0, a0, a1
+; ILP32E-WITHFP-NEXT: xor a2, a3, a2
+; ILP32E-WITHFP-NEXT: or a0, a2, a0
+; ILP32E-WITHFP-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-NEXT: seqz a0, a0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalars:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %b_bitcast = bitcast fp128 %b to i128
+ %1 = icmp eq i128 %a, %b_bitcast
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @caller_large_scalars() {
+; ILP32E-FPELIM-LABEL: caller_large_scalars:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -48
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 48
+; ILP32E-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: addi s0, sp, 48
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-NEXT: lui a0, 524272
+; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 0(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 36(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 32(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 28(sp)
+; ILP32E-FPELIM-NEXT: li a2, 1
+; ILP32E-FPELIM-NEXT: addi a0, sp, 24
+; ILP32E-FPELIM-NEXT: mv a1, sp
+; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32E-FPELIM-NEXT: call callee_large_scalars
+; ILP32E-FPELIM-NEXT: addi sp, s0, -48
+; ILP32E-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 48
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_large_scalars:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48
+; ILP32E-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 48
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-NEXT: lui a0, 524272
+; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 0(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 36(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 32(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 28(sp)
+; ILP32E-WITHFP-NEXT: li a2, 1
+; ILP32E-WITHFP-NEXT: addi a0, sp, 24
+; ILP32E-WITHFP-NEXT: mv a1, sp
+; ILP32E-WITHFP-NEXT: sw a2, 24(sp)
+; ILP32E-WITHFP-NEXT: call callee_large_scalars
+; ILP32E-WITHFP-NEXT: addi sp, s0, -48
+; ILP32E-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 40
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 16(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -40
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -32
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 16(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -40
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 32
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000)
+ ret i32 %1
+}
+
+; Check that arguments larger than 2*xlen are handled correctly when their
+; address is passed on the stack rather than in memory
+
+; Must keep define on a single line due to an update_llc_test_checks.py limitation
+define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) {
+; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lw a0, 12(sp)
+; ILP32E-FPELIM-NEXT: lw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a2, 0(a0)
+; ILP32E-FPELIM-NEXT: lw a3, 0(a1)
+; ILP32E-FPELIM-NEXT: lw a4, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a5, 12(a0)
+; ILP32E-FPELIM-NEXT: lw a6, 12(a1)
+; ILP32E-FPELIM-NEXT: lw a7, 4(a1)
+; ILP32E-FPELIM-NEXT: lw a0, 8(a0)
+; ILP32E-FPELIM-NEXT: lw a1, 8(a1)
+; ILP32E-FPELIM-NEXT: xor a5, a6, a5
+; ILP32E-FPELIM-NEXT: xor a4, a7, a4
+; ILP32E-FPELIM-NEXT: or a4, a4, a5
+; ILP32E-FPELIM-NEXT: xor a0, a1, a0
+; ILP32E-FPELIM-NEXT: xor a2, a3, a2
+; ILP32E-FPELIM-NEXT: or a0, a2, a0
+; ILP32E-FPELIM-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-NEXT: seqz a0, a0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_large_scalars_exhausted_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lw a0, 12(s0)
+; ILP32E-WITHFP-NEXT: lw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: lw a2, 0(a0)
+; ILP32E-WITHFP-NEXT: lw a3, 0(a1)
+; ILP32E-WITHFP-NEXT: lw a4, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a5, 12(a0)
+; ILP32E-WITHFP-NEXT: lw a6, 12(a1)
+; ILP32E-WITHFP-NEXT: lw a7, 4(a1)
+; ILP32E-WITHFP-NEXT: lw a0, 8(a0)
+; ILP32E-WITHFP-NEXT: lw a1, 8(a1)
+; ILP32E-WITHFP-NEXT: xor a5, a6, a5
+; ILP32E-WITHFP-NEXT: xor a4, a7, a4
+; ILP32E-WITHFP-NEXT: or a4, a4, a5
+; ILP32E-WITHFP-NEXT: xor a0, a1, a0
+; ILP32E-WITHFP-NEXT: xor a2, a3, a2
+; ILP32E-WITHFP-NEXT: or a0, a2, a0
+; ILP32E-WITHFP-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-NEXT: seqz a0, a0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a1, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 4(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %j_bitcast = bitcast fp128 %j to i128
+ %1 = icmp eq i128 %h, %j_bitcast
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @caller_large_scalars_exhausted_regs() {
+; ILP32E-FPELIM-LABEL: caller_large_scalars_exhausted_regs:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -64
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64
+; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: addi s0, sp, 64
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-NEXT: addi a0, sp, 16
+; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-NEXT: li a0, 9
+; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 40
+; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: li a0, 7
+; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 524272
+; ILP32E-FPELIM-NEXT: sw a0, 28(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 24(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 20(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 16(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 52(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 48(sp)
+; ILP32E-FPELIM-NEXT: li a0, 8
+; ILP32E-FPELIM-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a1, 2
+; ILP32E-FPELIM-NEXT: li a2, 3
+; ILP32E-FPELIM-NEXT: li a3, 4
+; ILP32E-FPELIM-NEXT: li a4, 5
+; ILP32E-FPELIM-NEXT: li a5, 6
+; ILP32E-FPELIM-NEXT: sw zero, 44(sp)
+; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs
+; ILP32E-FPELIM-NEXT: addi sp, s0, -64
+; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 64
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_large_scalars_exhausted_regs:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -64
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64
+; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 64
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-NEXT: addi a0, sp, 16
+; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-NEXT: li a0, 9
+; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
+; ILP32E-WITHFP-NEXT: addi a0, sp, 40
+; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
+; ILP32E-WITHFP-NEXT: li a0, 7
+; ILP32E-WITHFP-NEXT: sw a0, 0(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 524272
+; ILP32E-WITHFP-NEXT: sw a0, 28(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 24(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 20(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 16(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 52(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 48(sp)
+; ILP32E-WITHFP-NEXT: li a0, 8
+; ILP32E-WITHFP-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a1, 2
+; ILP32E-WITHFP-NEXT: li a2, 3
+; ILP32E-WITHFP-NEXT: li a3, 4
+; ILP32E-WITHFP-NEXT: li a4, 5
+; ILP32E-WITHFP-NEXT: li a5, 6
+; ILP32E-WITHFP-NEXT: sw zero, 44(sp)
+; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs
+; ILP32E-WITHFP-NEXT: addi sp, s0, -64
+; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 64
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 9
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 32
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 40(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 32(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 9
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 32
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 40(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 32(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_large_scalars_exhausted_regs(
+ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9,
+ fp128 0xL00000000000000007FFF000000000000)
+ ret i32 %1
+}
+
+; Ensure that libcalls generated in the middle-end obey the calling convention
+
+define i32 @caller_mixed_scalar_libcalls(i64 %a) {
+; ILP32E-FPELIM-LABEL: caller_mixed_scalar_libcalls:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -24
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: addi s0, sp, 24
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-NEXT: andi sp, sp, -8
+; ILP32E-FPELIM-NEXT: mv a2, a1
+; ILP32E-FPELIM-NEXT: mv a1, a0
+; ILP32E-FPELIM-NEXT: mv a0, sp
+; ILP32E-FPELIM-NEXT: call __floatditf
+; ILP32E-FPELIM-NEXT: lw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: addi sp, s0, -24
+; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 24
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_mixed_scalar_libcalls:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -24
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: andi sp, sp, -8
+; ILP32E-WITHFP-NEXT: mv a2, a1
+; ILP32E-WITHFP-NEXT: mv a1, a0
+; ILP32E-WITHFP-NEXT: mv a0, sp
+; ILP32E-WITHFP-NEXT: call __floatditf
+; ILP32E-WITHFP-NEXT: lw a0, 0(sp)
+; ILP32E-WITHFP-NEXT: addi sp, s0, -24
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 24
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_mixed_scalar_libcalls:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a2, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __floatditf
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_mixed_scalar_libcalls:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a2, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __floatditf
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = sitofp i64 %a to fp128
+ %2 = bitcast fp128 %1 to i128
+ %3 = trunc i128 %2 to i32
+ ret i32 %3
+}
+
+
+; Check passing of coerced integer arrays
+
+%struct.small = type { i32, i32* }
+
+define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) {
+; ILP32E-FPELIM-LABEL: callee_small_coerced_struct:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: xor a0, a0, a1
+; ILP32E-FPELIM-NEXT: seqz a0, a0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_small_coerced_struct:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: xor a0, a0, a1
+; ILP32E-WITHFP-NEXT: seqz a0, a0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_coerced_struct:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_coerced_struct:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = extractvalue [2 x i32] %a.coerce, 0
+ %2 = extractvalue [2 x i32] %a.coerce, 1
+ %3 = icmp eq i32 %1, %2
+ %4 = zext i1 %3 to i32
+ ret i32 %4
+}
+
+define i32 @caller_small_coerced_struct() {
+; ILP32E-FPELIM-LABEL: caller_small_coerced_struct:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a1, 2
+; ILP32E-FPELIM-NEXT: call callee_small_coerced_struct
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_small_coerced_struct:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a1, 2
+; ILP32E-WITHFP-NEXT: call callee_small_coerced_struct
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_coerced_struct:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_coerced_struct
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_coerced_struct:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_coerced_struct
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2])
+ ret i32 %1
+}
+
+; Check large struct arguments, which are passed byval
+
+%struct.large = type { i32, i32, i32, i32 }
+
+define i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %a) {
+; ILP32E-FPELIM-LABEL: callee_large_struct:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lw a1, 0(a0)
+; ILP32E-FPELIM-NEXT: lw a0, 12(a0)
+; ILP32E-FPELIM-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_large_struct:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lw a1, 0(a0)
+; ILP32E-WITHFP-NEXT: lw a0, 12(a0)
+; ILP32E-WITHFP-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0
+ %2 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 3
+ %3 = load i32, i32* %1
+ %4 = load i32, i32* %2
+ %5 = add i32 %3, %4
+ ret i32 %5
+}
+
+define i32 @caller_large_struct() {
+; ILP32E-FPELIM-LABEL: caller_large_struct:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -36
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 36
+; ILP32E-FPELIM-NEXT: sw ra, 32(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: sw a0, 16(sp)
+; ILP32E-FPELIM-NEXT: li a1, 2
+; ILP32E-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32E-FPELIM-NEXT: li a2, 3
+; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32E-FPELIM-NEXT: li a3, 4
+; ILP32E-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 12(sp)
+; ILP32E-FPELIM-NEXT: mv a0, sp
+; ILP32E-FPELIM-NEXT: call callee_large_struct
+; ILP32E-FPELIM-NEXT: lw ra, 32(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 36
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_large_struct:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -40
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 40
+; ILP32E-WITHFP-NEXT: sw ra, 36(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 32(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 40
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: sw a0, -24(s0)
+; ILP32E-WITHFP-NEXT: li a1, 2
+; ILP32E-WITHFP-NEXT: sw a1, -20(s0)
+; ILP32E-WITHFP-NEXT: li a2, 3
+; ILP32E-WITHFP-NEXT: sw a2, -16(s0)
+; ILP32E-WITHFP-NEXT: li a3, 4
+; ILP32E-WITHFP-NEXT: sw a3, -12(s0)
+; ILP32E-WITHFP-NEXT: sw a0, -40(s0)
+; ILP32E-WITHFP-NEXT: sw a1, -36(s0)
+; ILP32E-WITHFP-NEXT: sw a2, -32(s0)
+; ILP32E-WITHFP-NEXT: sw a3, -28(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, -40
+; ILP32E-WITHFP-NEXT: call callee_large_struct
+; ILP32E-WITHFP-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 32(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 40
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_struct:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 36
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 28(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_struct
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_struct:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -32
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -24(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -20(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -16(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -12(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -40(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -36(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -32(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -28(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, s0, -40
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_struct
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 32
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %ls = alloca %struct.large, align 4
+ %1 = bitcast %struct.large* %ls to i8*
+ %a = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 0
+ store i32 1, i32* %a
+ %b = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 1
+ store i32 2, i32* %b
+ %c = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 2
+ store i32 3, i32* %c
+ %d = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 3
+ store i32 4, i32* %d
+ %2 = call i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %ls)
+ ret i32 %2
+}
+
+; Check return of 2x xlen structs
+
+define %struct.small @callee_small_struct_ret() {
+; ILP32E-FPELIM-LABEL: callee_small_struct_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a1, 0
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_small_struct_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a1, 0
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_struct_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_struct_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ ret %struct.small { i32 1, i32* null }
+}
+
+define i32 @caller_small_struct_ret() {
+; ILP32E-FPELIM-LABEL: caller_small_struct_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: call callee_small_struct_ret
+; ILP32E-FPELIM-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_small_struct_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: call callee_small_struct_ret
+; ILP32E-WITHFP-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_struct_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_struct_ret
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_struct_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_struct_ret
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call %struct.small @callee_small_struct_ret()
+ %2 = extractvalue %struct.small %1, 0
+ %3 = extractvalue %struct.small %1, 1
+ %4 = ptrtoint i32* %3 to i32
+ %5 = add i32 %2, %4
+ ret i32 %5
+}
+
+; Check return of >2x xlen scalars
+
+define fp128 @callee_large_scalar_ret() {
+; ILP32E-FPELIM-LABEL: callee_large_scalar_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lui a1, 524272
+; ILP32E-FPELIM-NEXT: sw a1, 12(a0)
+; ILP32E-FPELIM-NEXT: sw zero, 8(a0)
+; ILP32E-FPELIM-NEXT: sw zero, 4(a0)
+; ILP32E-FPELIM-NEXT: sw zero, 0(a0)
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_large_scalar_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: lui a1, 524272
+; ILP32E-WITHFP-NEXT: sw a1, 12(a0)
+; ILP32E-WITHFP-NEXT: sw zero, 8(a0)
+; ILP32E-WITHFP-NEXT: sw zero, 4(a0)
+; ILP32E-WITHFP-NEXT: sw zero, 0(a0)
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalar_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 524272
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalar_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 524272
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ ret fp128 0xL00000000000000007FFF000000000000
+}
+
+define void @caller_large_scalar_ret() {
+; ILP32E-FPELIM-LABEL: caller_large_scalar_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32
+; ILP32E-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: addi s0, sp, 32
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-NEXT: mv a0, sp
+; ILP32E-FPELIM-NEXT: call callee_large_scalar_ret
+; ILP32E-FPELIM-NEXT: addi sp, s0, -32
+; ILP32E-FPELIM-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_large_scalar_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -32
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 32
+; ILP32E-WITHFP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 32
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-NEXT: mv a0, sp
+; ILP32E-WITHFP-NEXT: call callee_large_scalar_ret
+; ILP32E-WITHFP-NEXT: addi sp, s0, -32
+; ILP32E-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 32
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalar_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalar_ret
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalar_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalar_ret
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = call fp128 @callee_large_scalar_ret()
+ ret void
+}
+
+; Check return of >2x xlen structs
+
+define void @callee_large_struct_ret(%struct.large* noalias sret(%struct.large) %agg.result) {
+; ILP32E-FPELIM-LABEL: callee_large_struct_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: li a1, 1
+; ILP32E-FPELIM-NEXT: sw a1, 0(a0)
+; ILP32E-FPELIM-NEXT: li a1, 2
+; ILP32E-FPELIM-NEXT: sw a1, 4(a0)
+; ILP32E-FPELIM-NEXT: li a1, 3
+; ILP32E-FPELIM-NEXT: sw a1, 8(a0)
+; ILP32E-FPELIM-NEXT: li a1, 4
+; ILP32E-FPELIM-NEXT: sw a1, 12(a0)
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: callee_large_struct_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: li a1, 1
+; ILP32E-WITHFP-NEXT: sw a1, 0(a0)
+; ILP32E-WITHFP-NEXT: li a1, 2
+; ILP32E-WITHFP-NEXT: sw a1, 4(a0)
+; ILP32E-WITHFP-NEXT: li a1, 3
+; ILP32E-WITHFP-NEXT: sw a1, 8(a0)
+; ILP32E-WITHFP-NEXT: li a1, 4
+; ILP32E-WITHFP-NEXT: sw a1, 12(a0)
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 3
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 3
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0
+ store i32 1, i32* %a, align 4
+ %b = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 1
+ store i32 2, i32* %b, align 4
+ %c = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 2
+ store i32 3, i32* %c, align 4
+ %d = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 3
+ store i32 4, i32* %d, align 4
+ ret void
+}
+
+define i32 @caller_large_struct_ret() {
+; ILP32E-FPELIM-LABEL: caller_large_struct_ret:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -24
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-NEXT: addi s0, sp, 24
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-NEXT: andi sp, sp, -8
+; ILP32E-FPELIM-NEXT: mv a0, sp
+; ILP32E-FPELIM-NEXT: call callee_large_struct_ret
+; ILP32E-FPELIM-NEXT: lw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: lw a1, 12(sp)
+; ILP32E-FPELIM-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-NEXT: addi sp, s0, -24
+; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 24
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller_large_struct_ret:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -24
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: andi sp, sp, -8
+; ILP32E-WITHFP-NEXT: mv a0, sp
+; ILP32E-WITHFP-NEXT: call callee_large_struct_ret
+; ILP32E-WITHFP-NEXT: lw a0, 0(sp)
+; ILP32E-WITHFP-NEXT: lw a1, 12(sp)
+; ILP32E-WITHFP-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-NEXT: addi sp, s0, -24
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 24
+; ILP32E-WITHFP-NEXT: ret
+;
+; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_struct_ret:
+; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_struct_ret
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+;
+; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_struct_ret:
+; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0:
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_struct_ret
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
+ %1 = alloca %struct.large
+ call void @callee_large_struct_ret(%struct.large* sret(%struct.large) %1)
+ %2 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 0
+ %3 = load i32, i32* %2
+ %4 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 3
+ %5 = load i32, i32* %4
+ %6 = add i32 %3, %5
+ ret i32 %6
+}
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll
new file mode 100644
index 000000000000000..bb2fd5934025122
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-LP64E-FPELIM %s
+; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs -frame-pointer=all < %s \
+; RUN: | FileCheck -check-prefix=RV64I-LP64E-WITHFP %s
+
+; This file contains tests that will have
diff ering output for the lp64e ABIs.
+
+define i64 @callee_float_in_regs(i64 %a, float %b) nounwind {
+; RV64I-LP64E-FPELIM-LABEL: callee_float_in_regs:
+; RV64I-LP64E-FPELIM: # %bb.0:
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -16
+; RV64I-LP64E-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: mv s0, a0
+; RV64I-LP64E-FPELIM-NEXT: sext.w a0, a1
+; RV64I-LP64E-FPELIM-NEXT: call __fixsfdi
+; RV64I-LP64E-FPELIM-NEXT: add a0, s0, a0
+; RV64I-LP64E-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 16
+; RV64I-LP64E-FPELIM-NEXT: ret
+;
+; RV64I-LP64E-WITHFP-LABEL: callee_float_in_regs:
+; RV64I-LP64E-WITHFP: # %bb.0:
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -24
+; RV64I-LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: sd s1, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 24
+; RV64I-LP64E-WITHFP-NEXT: mv s1, a0
+; RV64I-LP64E-WITHFP-NEXT: sext.w a0, a1
+; RV64I-LP64E-WITHFP-NEXT: call __fixsfdi
+; RV64I-LP64E-WITHFP-NEXT: add a0, s1, a0
+; RV64I-LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 24
+; RV64I-LP64E-WITHFP-NEXT: ret
+ %b_fptosi = fptosi float %b to i64
+ %1 = add i64 %a, %b_fptosi
+ ret i64 %1
+}
+
+define i64 @caller_float_in_regs() nounwind {
+; RV64I-LP64E-FPELIM-LABEL: caller_float_in_regs:
+; RV64I-LP64E-FPELIM: # %bb.0:
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -8
+; RV64I-LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: li a0, 1
+; RV64I-LP64E-FPELIM-NEXT: lui a1, 262144
+; RV64I-LP64E-FPELIM-NEXT: call callee_float_in_regs
+; RV64I-LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 8
+; RV64I-LP64E-FPELIM-NEXT: ret
+;
+; RV64I-LP64E-WITHFP-LABEL: caller_float_in_regs:
+; RV64I-LP64E-WITHFP: # %bb.0:
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16
+; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: li a0, 1
+; RV64I-LP64E-WITHFP-NEXT: lui a1, 262144
+; RV64I-LP64E-WITHFP-NEXT: call callee_float_in_regs
+; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: ret
+ %1 = call i64 @callee_float_in_regs(i64 1, float 2.0)
+ ret i64 %1
+}
+
+define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e) nounwind {
+; RV64I-LP64E-FPELIM-LABEL: callee_float_on_stack:
+; RV64I-LP64E-FPELIM: # %bb.0:
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -16
+; RV64I-LP64E-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 16
+; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16
+; RV64I-LP64E-FPELIM-NEXT: lw a0, 16(s0)
+; RV64I-LP64E-FPELIM-NEXT: addi sp, s0, -16
+; RV64I-LP64E-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 16
+; RV64I-LP64E-FPELIM-NEXT: ret
+;
+; RV64I-LP64E-WITHFP-LABEL: callee_float_on_stack:
+; RV64I-LP64E-WITHFP: # %bb.0:
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16
+; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16
+; RV64I-LP64E-WITHFP-NEXT: lw a0, 16(s0)
+; RV64I-LP64E-WITHFP-NEXT: addi sp, s0, -16
+; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: ret
+ %1 = trunc i128 %d to i64
+ %2 = bitcast float %e to i32
+ %3 = sext i32 %2 to i64
+ %4 = add i64 %1, %3
+ ret i64 %3
+}
+
+define i64 @caller_float_on_stack() nounwind {
+; RV64I-LP64E-FPELIM-LABEL: caller_float_on_stack:
+; RV64I-LP64E-FPELIM: # %bb.0:
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -48
+; RV64I-LP64E-FPELIM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 48
+; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16
+; RV64I-LP64E-FPELIM-NEXT: lui a0, 264704
+; RV64I-LP64E-FPELIM-NEXT: sd a0, 16(sp)
+; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp)
+; RV64I-LP64E-FPELIM-NEXT: li a1, 4
+; RV64I-LP64E-FPELIM-NEXT: li a0, 1
+; RV64I-LP64E-FPELIM-NEXT: li a2, 2
+; RV64I-LP64E-FPELIM-NEXT: li a4, 3
+; RV64I-LP64E-FPELIM-NEXT: sd a1, 0(sp)
+; RV64I-LP64E-FPELIM-NEXT: li a1, 0
+; RV64I-LP64E-FPELIM-NEXT: li a3, 0
+; RV64I-LP64E-FPELIM-NEXT: li a5, 0
+; RV64I-LP64E-FPELIM-NEXT: call callee_float_on_stack
+; RV64I-LP64E-FPELIM-NEXT: addi sp, s0, -48
+; RV64I-LP64E-FPELIM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 48
+; RV64I-LP64E-FPELIM-NEXT: ret
+;
+; RV64I-LP64E-WITHFP-LABEL: caller_float_on_stack:
+; RV64I-LP64E-WITHFP: # %bb.0:
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -48
+; RV64I-LP64E-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 48
+; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16
+; RV64I-LP64E-WITHFP-NEXT: lui a0, 264704
+; RV64I-LP64E-WITHFP-NEXT: sd a0, 16(sp)
+; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp)
+; RV64I-LP64E-WITHFP-NEXT: li a1, 4
+; RV64I-LP64E-WITHFP-NEXT: li a0, 1
+; RV64I-LP64E-WITHFP-NEXT: li a2, 2
+; RV64I-LP64E-WITHFP-NEXT: li a4, 3
+; RV64I-LP64E-WITHFP-NEXT: sd a1, 0(sp)
+; RV64I-LP64E-WITHFP-NEXT: li a1, 0
+; RV64I-LP64E-WITHFP-NEXT: li a3, 0
+; RV64I-LP64E-WITHFP-NEXT: li a5, 0
+; RV64I-LP64E-WITHFP-NEXT: call callee_float_on_stack
+; RV64I-LP64E-WITHFP-NEXT: addi sp, s0, -48
+; RV64I-LP64E-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 48
+; RV64I-LP64E-WITHFP-NEXT: ret
+ %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0)
+ ret i64 %1
+}
+
+define float @callee_tiny_scalar_ret() nounwind {
+; RV64I-LP64E-FPELIM-LABEL: callee_tiny_scalar_ret:
+; RV64I-LP64E-FPELIM: # %bb.0:
+; RV64I-LP64E-FPELIM-NEXT: lui a0, 260096
+; RV64I-LP64E-FPELIM-NEXT: ret
+;
+; RV64I-LP64E-WITHFP-LABEL: callee_tiny_scalar_ret:
+; RV64I-LP64E-WITHFP: # %bb.0:
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16
+; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: lui a0, 260096
+; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: ret
+ ret float 1.0
+}
+
+; The sign extension of the float return is necessary, as softened floats are
+; passed anyext.
+
+define i64 @caller_tiny_scalar_ret() nounwind {
+; RV64I-LP64E-FPELIM-LABEL: caller_tiny_scalar_ret:
+; RV64I-LP64E-FPELIM: # %bb.0:
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -8
+; RV64I-LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-FPELIM-NEXT: call callee_tiny_scalar_ret
+; RV64I-LP64E-FPELIM-NEXT: sext.w a0, a0
+; RV64I-LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 8
+; RV64I-LP64E-FPELIM-NEXT: ret
+;
+; RV64I-LP64E-WITHFP-LABEL: caller_tiny_scalar_ret:
+; RV64I-LP64E-WITHFP: # %bb.0:
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16
+; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: call callee_tiny_scalar_ret
+; RV64I-LP64E-WITHFP-NEXT: sext.w a0, a0
+; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16
+; RV64I-LP64E-WITHFP-NEXT: ret
+ %1 = call float @callee_tiny_scalar_ret()
+ %2 = bitcast float %1 to i32
+ %3 = sext i32 %2 to i64
+ ret i64 %3
+}
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll
new file mode 100644
index 000000000000000..d3530a4341330d5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IF-ILP32E
+
+; Exercises the ILP32E calling convention code in the case that f32 is a legal
+; type. As well as testing that lowering is correct, these tests also aim to
+; check that floating point load/store or integer load/store is chosen
+; optimally when floats are passed on the stack.
+
+define float @onstack_f32_noop(i64 %a, i64 %b, i64 %c, i64 %d, float %e, float %f) nounwind {
+; RV32IF-ILP32E-LABEL: onstack_f32_noop:
+; RV32IF-ILP32E: # %bb.0:
+; RV32IF-ILP32E-NEXT: lw a0, 12(sp)
+; RV32IF-ILP32E-NEXT: ret
+ ret float %f
+}
+
+define float @onstack_f32_fadd(i64 %a, i64 %b, i64 %c, i64 %d, float %e, float %f) nounwind {
+; RV32IF-ILP32E-LABEL: onstack_f32_fadd:
+; RV32IF-ILP32E: # %bb.0:
+; RV32IF-ILP32E-NEXT: flw fa5, 12(sp)
+; RV32IF-ILP32E-NEXT: flw fa4, 8(sp)
+; RV32IF-ILP32E-NEXT: fadd.s fa5, fa4, fa5
+; RV32IF-ILP32E-NEXT: fmv.x.w a0, fa5
+; RV32IF-ILP32E-NEXT: ret
+ %1 = fadd float %e, %f
+ ret float %1
+}
+
+define float @caller_onstack_f32_noop(float %a) nounwind {
+; RV32IF-ILP32E-LABEL: caller_onstack_f32_noop:
+; RV32IF-ILP32E: # %bb.0:
+; RV32IF-ILP32E-NEXT: addi sp, sp, -20
+; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill
+; RV32IF-ILP32E-NEXT: sw a0, 12(sp)
+; RV32IF-ILP32E-NEXT: lui a0, 264704
+; RV32IF-ILP32E-NEXT: sw a0, 8(sp)
+; RV32IF-ILP32E-NEXT: sw zero, 4(sp)
+; RV32IF-ILP32E-NEXT: li a1, 4
+; RV32IF-ILP32E-NEXT: li a0, 1
+; RV32IF-ILP32E-NEXT: li a2, 2
+; RV32IF-ILP32E-NEXT: li a4, 3
+; RV32IF-ILP32E-NEXT: sw a1, 0(sp)
+; RV32IF-ILP32E-NEXT: li a1, 0
+; RV32IF-ILP32E-NEXT: li a3, 0
+; RV32IF-ILP32E-NEXT: li a5, 0
+; RV32IF-ILP32E-NEXT: call onstack_f32_noop
+; RV32IF-ILP32E-NEXT: lw ra, 16(sp) # 4-byte Folded Reload
+; RV32IF-ILP32E-NEXT: addi sp, sp, 20
+; RV32IF-ILP32E-NEXT: ret
+ %1 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float 5.0, float %a)
+ ret float %1
+}
+
+define float @caller_onstack_f32_fadd(float %a, float %b) nounwind {
+; RV32IF-ILP32E-LABEL: caller_onstack_f32_fadd:
+; RV32IF-ILP32E: # %bb.0:
+; RV32IF-ILP32E-NEXT: addi sp, sp, -20
+; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill
+; RV32IF-ILP32E-NEXT: fmv.w.x fa5, a1
+; RV32IF-ILP32E-NEXT: fmv.w.x fa4, a0
+; RV32IF-ILP32E-NEXT: fadd.s fa3, fa4, fa5
+; RV32IF-ILP32E-NEXT: fsub.s fa5, fa5, fa4
+; RV32IF-ILP32E-NEXT: sw zero, 4(sp)
+; RV32IF-ILP32E-NEXT: li a0, 4
+; RV32IF-ILP32E-NEXT: sw a0, 0(sp)
+; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp)
+; RV32IF-ILP32E-NEXT: li a0, 1
+; RV32IF-ILP32E-NEXT: li a2, 2
+; RV32IF-ILP32E-NEXT: li a4, 3
+; RV32IF-ILP32E-NEXT: fsw fa3, 8(sp)
+; RV32IF-ILP32E-NEXT: li a1, 0
+; RV32IF-ILP32E-NEXT: li a3, 0
+; RV32IF-ILP32E-NEXT: li a5, 0
+; RV32IF-ILP32E-NEXT: call onstack_f32_noop
+; RV32IF-ILP32E-NEXT: lw ra, 16(sp) # 4-byte Folded Reload
+; RV32IF-ILP32E-NEXT: addi sp, sp, 20
+; RV32IF-ILP32E-NEXT: ret
+ %1 = fadd float %a, %b
+ %2 = fsub float %b, %a
+ %3 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float %1, float %2)
+ ret float %3
+}
diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
index 5887968042cb008..e22720d6b3e2ba6 100644
--- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll
+++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
@@ -6,12 +6,28 @@
; RUN: llc -mtriple riscv32-unknown-elf -mattr=+f,+d -o - %s \
; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FD
;
+; RUN: llc -mtriple riscv32-unknown-elf -mattr=+i -target-abi ilp32e -o - %s \
+; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32I-ILP32E
+; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e -o - %s \
+; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32E
+; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e,+f -o - %s \
+; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32E-F
+;
; RUN: llc -mtriple riscv64-unknown-elf -o - %s \
; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64
; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f -o - %s \
; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-F
; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f,+d -o - %s \
; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FD
+;
+; RUN: llc -mtriple riscv64-unknown-elf -mattr=+i -target-abi lp64e -o - %s \
+; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64I-LP64E
+; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e -o - %s \
+; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E
+; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e,+f -o - %s \
+; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E-F
+; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e,+f,+d -o - %s \
+; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E-FD
;
; Checking for special return instructions (sret, mret).
@@ -289,6 +305,183 @@ define void @foo_with_call() #1 {
; CHECK-RV32-FD-NEXT: addi sp, sp, 320
; CHECK-RV32-FD-NEXT: mret
;
+; CHECK-RV32I-ILP32E-LABEL: foo_with_call:
+; CHECK-RV32I-ILP32E: # %bb.0:
+; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, -104
+; CHECK-RV32I-ILP32E-NEXT: sw ra, 100(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t1, 92(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t2, 88(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a3, 72(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a4, 68(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a5, 64(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a6, 60(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a7, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s2, 52(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s3, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s4, 44(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s5, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s6, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s7, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s9, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s11, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t3, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t4, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t5, 4(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t6, 0(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: call otherfoo
+; CHECK-RV32I-ILP32E-NEXT: lw ra, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t0, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t1, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t2, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a2, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a3, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a4, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a5, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a6, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a7, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s2, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s3, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s4, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s5, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s6, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s7, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s8, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s9, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s10, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s11, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t3, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t4, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t5, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t6, 0(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, 104
+; CHECK-RV32I-ILP32E-NEXT: mret
+;
+; CHECK-RV32E-LABEL: foo_with_call:
+; CHECK-RV32E: # %bb.0:
+; CHECK-RV32E-NEXT: addi sp, sp, -40
+; CHECK-RV32E-NEXT: sw ra, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw t0, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw t1, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw t2, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a5, 0(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: call otherfoo
+; CHECK-RV32E-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw t0, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw t1, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw t2, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a2, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a4, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a5, 0(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: addi sp, sp, 40
+; CHECK-RV32E-NEXT: mret
+;
+; CHECK-RV32E-F-LABEL: foo_with_call:
+; CHECK-RV32E-F: # %bb.0:
+; CHECK-RV32E-F-NEXT: addi sp, sp, -168
+; CHECK-RV32E-F-NEXT: sw ra, 164(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw t0, 160(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw t1, 156(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw t2, 152(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a2, 140(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a3, 136(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a4, 132(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: call otherfoo
+; CHECK-RV32E-F-NEXT: lw ra, 164(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw t0, 160(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw t1, 156(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw t2, 152(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a3, 136(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a4, 132(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: addi sp, sp, 168
+; CHECK-RV32E-F-NEXT: mret
+;
; CHECK-RV64-LABEL: foo_with_call:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: addi sp, sp, -128
@@ -533,6 +726,306 @@ define void @foo_with_call() #1 {
; CHECK-RV64-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload
; CHECK-RV64-FD-NEXT: addi sp, sp, 384
; CHECK-RV64-FD-NEXT: mret
+;
+; CHECK-RV64I-LP64E-LABEL: foo_with_call:
+; CHECK-RV64I-LP64E: # %bb.0:
+; CHECK-RV64I-LP64E-NEXT: addi sp, sp, -208
+; CHECK-RV64I-LP64E-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t0, 192(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t1, 184(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t2, 176(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a3, 144(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a6, 120(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a7, 112(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s2, 104(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s3, 96(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s4, 88(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s5, 80(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s6, 72(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s7, 64(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s8, 56(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s9, 48(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s10, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s11, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t3, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t4, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t5, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t6, 0(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: call otherfoo
+; CHECK-RV64I-LP64E-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t0, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t1, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t2, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a2, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a3, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a4, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a5, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a6, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a7, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s2, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s3, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s4, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s5, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s6, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s7, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s8, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s9, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s10, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s11, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t3, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t4, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t5, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t6, 0(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: addi sp, sp, 208
+; CHECK-RV64I-LP64E-NEXT: mret
+;
+; CHECK-RV64E-LABEL: foo_with_call:
+; CHECK-RV64E: # %bb.0:
+; CHECK-RV64E-NEXT: addi sp, sp, -80
+; CHECK-RV64E-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd t0, 64(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd t1, 56(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd t2, 48(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a4, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a5, 0(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: call otherfoo
+; CHECK-RV64E-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld t0, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld t1, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld t2, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a2, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a3, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a4, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a5, 0(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: addi sp, sp, 80
+; CHECK-RV64E-NEXT: mret
+;
+; CHECK-RV64E-F-LABEL: foo_with_call:
+; CHECK-RV64E-F: # %bb.0:
+; CHECK-RV64E-F-NEXT: addi sp, sp, -208
+; CHECK-RV64E-F-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd t0, 192(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd t1, 184(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd t2, 176(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a3, 144(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: call otherfoo
+; CHECK-RV64E-F-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld t0, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld t1, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld t2, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a2, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a3, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a4, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a5, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: addi sp, sp, 208
+; CHECK-RV64E-F-NEXT: mret
+;
+; CHECK-RV64E-FD-LABEL: foo_with_call:
+; CHECK-RV64E-FD: # %bb.0:
+; CHECK-RV64E-FD-NEXT: addi sp, sp, -464
+; CHECK-RV64E-FD-NEXT: sd ra, 456(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t0, 448(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t1, 440(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t2, 432(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a2, 408(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a3, 400(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a5, 384(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a6, 376(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a7, 368(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s2, 360(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s3, 352(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s4, 344(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s5, 336(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s6, 328(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s7, 320(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s8, 312(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s9, 304(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s10, 296(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s11, 288(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t3, 280(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t4, 272(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t5, 264(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t6, 256(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: call otherfoo
+; CHECK-RV64E-FD-NEXT: ld ra, 456(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t0, 448(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t1, 440(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t2, 432(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a0, 424(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a2, 408(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a3, 400(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a4, 392(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a5, 384(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a6, 376(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a7, 368(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s2, 360(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s3, 352(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s4, 344(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s5, 336(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s6, 328(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s7, 320(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s8, 312(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s9, 304(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s10, 296(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s11, 288(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t3, 280(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t4, 272(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t5, 264(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t6, 256(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: addi sp, sp, 464
+; CHECK-RV64E-FD-NEXT: mret
%call = call i32 @otherfoo()
ret void
}
@@ -796,6 +1289,192 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FD-NEXT: addi sp, sp, 336
; CHECK-RV32-FD-NEXT: mret
;
+; CHECK-RV32I-ILP32E-LABEL: foo_fp_with_call:
+; CHECK-RV32I-ILP32E: # %bb.0:
+; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, -108
+; CHECK-RV32I-ILP32E-NEXT: sw ra, 104(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t1, 96(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t2, 92(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s0, 88(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a3, 72(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a4, 68(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a5, 64(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a6, 60(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw a7, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s2, 52(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s3, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s4, 44(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s5, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s6, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s7, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s9, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw s11, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t3, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t4, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t5, 4(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: sw t6, 0(sp) # 4-byte Folded Spill
+; CHECK-RV32I-ILP32E-NEXT: addi s0, sp, 108
+; CHECK-RV32I-ILP32E-NEXT: call otherfoo
+; CHECK-RV32I-ILP32E-NEXT: lw ra, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t0, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t1, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t2, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s0, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a2, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a3, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a4, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a5, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a6, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw a7, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s2, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s3, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s4, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s5, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s6, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s7, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s8, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s9, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s10, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw s11, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t3, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t4, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t5, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: lw t6, 0(sp) # 4-byte Folded Reload
+; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, 108
+; CHECK-RV32I-ILP32E-NEXT: mret
+;
+; CHECK-RV32E-LABEL: foo_fp_with_call:
+; CHECK-RV32E: # %bb.0:
+; CHECK-RV32E-NEXT: addi sp, sp, -44
+; CHECK-RV32E-NEXT: sw ra, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw t0, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw t1, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw t2, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a1, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: sw a5, 0(sp) # 4-byte Folded Spill
+; CHECK-RV32E-NEXT: addi s0, sp, 44
+; CHECK-RV32E-NEXT: call otherfoo
+; CHECK-RV32E-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw t0, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw t1, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw t2, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a2, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a4, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: lw a5, 0(sp) # 4-byte Folded Reload
+; CHECK-RV32E-NEXT: addi sp, sp, 44
+; CHECK-RV32E-NEXT: mret
+;
+; CHECK-RV32E-F-LABEL: foo_fp_with_call:
+; CHECK-RV32E-F: # %bb.0:
+; CHECK-RV32E-F-NEXT: addi sp, sp, -172
+; CHECK-RV32E-F-NEXT: sw ra, 168(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw t0, 164(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw t1, 160(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw t2, 156(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a0, 148(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a2, 140(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a3, 136(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a4, 132(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill
+; CHECK-RV32E-F-NEXT: addi s0, sp, 172
+; CHECK-RV32E-F-NEXT: call otherfoo
+; CHECK-RV32E-F-NEXT: lw ra, 168(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw t0, 164(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw t1, 160(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw t2, 156(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a3, 136(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a4, 132(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload
+; CHECK-RV32E-F-NEXT: addi sp, sp, 172
+; CHECK-RV32E-F-NEXT: mret
+;
; CHECK-RV64-LABEL: foo_fp_with_call:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: addi sp, sp, -144
@@ -1049,6 +1728,318 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FD-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload
; CHECK-RV64-FD-NEXT: addi sp, sp, 400
; CHECK-RV64-FD-NEXT: mret
+;
+; CHECK-RV64I-LP64E-LABEL: foo_fp_with_call:
+; CHECK-RV64I-LP64E: # %bb.0:
+; CHECK-RV64I-LP64E-NEXT: addi sp, sp, -216
+; CHECK-RV64I-LP64E-NEXT: sd ra, 208(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t0, 200(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t1, 192(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t2, 184(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a3, 144(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a6, 120(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd a7, 112(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s2, 104(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s3, 96(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s4, 88(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s5, 80(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s6, 72(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s7, 64(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s8, 56(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s9, 48(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s10, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd s11, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t3, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t4, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t5, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: sd t6, 0(sp) # 8-byte Folded Spill
+; CHECK-RV64I-LP64E-NEXT: addi s0, sp, 216
+; CHECK-RV64I-LP64E-NEXT: call otherfoo
+; CHECK-RV64I-LP64E-NEXT: ld ra, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t0, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t1, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t2, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a2, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a3, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a4, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a5, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a6, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld a7, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s2, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s3, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s4, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s5, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s6, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s7, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s8, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s9, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s10, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld s11, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t3, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t4, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t5, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: ld t6, 0(sp) # 8-byte Folded Reload
+; CHECK-RV64I-LP64E-NEXT: addi sp, sp, 216
+; CHECK-RV64I-LP64E-NEXT: mret
+;
+; CHECK-RV64E-LABEL: foo_fp_with_call:
+; CHECK-RV64E: # %bb.0:
+; CHECK-RV64E-NEXT: addi sp, sp, -88
+; CHECK-RV64E-NEXT: sd ra, 80(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd t0, 72(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd t1, 64(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd t2, 56(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a1, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a4, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: sd a5, 0(sp) # 8-byte Folded Spill
+; CHECK-RV64E-NEXT: addi s0, sp, 88
+; CHECK-RV64E-NEXT: call otherfoo
+; CHECK-RV64E-NEXT: ld ra, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld t0, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld t1, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld t2, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a2, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a3, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a4, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: ld a5, 0(sp) # 8-byte Folded Reload
+; CHECK-RV64E-NEXT: addi sp, sp, 88
+; CHECK-RV64E-NEXT: mret
+;
+; CHECK-RV64E-F-LABEL: foo_fp_with_call:
+; CHECK-RV64E-F: # %bb.0:
+; CHECK-RV64E-F-NEXT: addi sp, sp, -216
+; CHECK-RV64E-F-NEXT: sd ra, 208(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd t0, 200(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd t1, 192(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd t2, 184(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a2, 152(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a3, 144(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill
+; CHECK-RV64E-F-NEXT: addi s0, sp, 216
+; CHECK-RV64E-F-NEXT: call otherfoo
+; CHECK-RV64E-F-NEXT: ld ra, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld t0, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld t1, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld t2, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a1, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a2, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a3, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a4, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: ld a5, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload
+; CHECK-RV64E-F-NEXT: addi sp, sp, 216
+; CHECK-RV64E-F-NEXT: mret
+;
+; CHECK-RV64E-FD-LABEL: foo_fp_with_call:
+; CHECK-RV64E-FD: # %bb.0:
+; CHECK-RV64E-FD-NEXT: addi sp, sp, -472
+; CHECK-RV64E-FD-NEXT: sd ra, 464(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t0, 456(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t1, 448(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t2, 440(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s0, 432(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a0, 424(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a1, 416(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a2, 408(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a3, 400(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a4, 392(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a5, 384(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a6, 376(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd a7, 368(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s2, 360(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s3, 352(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s4, 344(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s5, 336(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s6, 328(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s7, 320(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s8, 312(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s9, 304(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s10, 296(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd s11, 288(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t3, 280(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t4, 272(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t5, 264(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: sd t6, 256(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill
+; CHECK-RV64E-FD-NEXT: addi s0, sp, 472
+; CHECK-RV64E-FD-NEXT: call otherfoo
+; CHECK-RV64E-FD-NEXT: ld ra, 464(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t0, 456(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t1, 448(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t2, 440(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s0, 432(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a0, 424(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a1, 416(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a2, 408(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a3, 400(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a4, 392(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a5, 384(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a6, 376(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld a7, 368(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s2, 360(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s3, 352(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s4, 344(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s5, 336(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s6, 328(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s7, 320(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s8, 312(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s9, 304(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s10, 296(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld s11, 288(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t3, 280(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t4, 272(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t5, 264(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: ld t6, 256(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload
+; CHECK-RV64E-FD-NEXT: addi sp, sp, 472
+; CHECK-RV64E-FD-NEXT: mret
%call = call i32 @otherfoo()
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rv32e.ll b/llvm/test/CodeGen/RISCV/rv32e.ll
new file mode 100644
index 000000000000000..ff73dd216da2297
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32e.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=riscv32 -mattr=+e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+; TODO: Add more tests.
+
+define i32 @exhausted(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
+; CHECK-LABEL: exhausted:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw t0, 0(sp)
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: add a4, a5, a4
+; CHECK-NEXT: add a0, a4, a0
+; CHECK-NEXT: add a0, t0, a0
+; CHECK-NEXT: ret
+ %1 = add i32 %a, %b
+ %2 = add i32 %c, %1
+ %3 = add i32 %d, %2
+ %4 = add i32 %e, %3
+ %5 = add i32 %f, %4
+ %6 = add i32 %g, %5
+ ret i32 %6
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64e.ll b/llvm/test/CodeGen/RISCV/rv64e.ll
new file mode 100644
index 000000000000000..093d503750abc78
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64e.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=riscv64 -mattr=+e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+; TODO: Add more tests.
+
+define i64 @exhausted(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) {
+; CHECK-LABEL: exhausted:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld t0, 0(sp)
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: add a4, a5, a4
+; CHECK-NEXT: add a0, a4, a0
+; CHECK-NEXT: add a0, t0, a0
+; CHECK-NEXT: ret
+ %1 = add i64 %a, %b
+ %2 = add i64 %c, %1
+ %3 = add i64 %d, %2
+ %4 = add i64 %e, %3
+ %5 = add i64 %f, %4
+ %6 = add i64 %g, %5
+ ret i64 %6
+}
diff --git a/llvm/test/CodeGen/RISCV/rve.ll b/llvm/test/CodeGen/RISCV/rve.ll
deleted file mode 100644
index 29b9bab61f7ff83..000000000000000
--- a/llvm/test/CodeGen/RISCV/rve.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -mtriple=riscv64 -mattr=+e < %s 2>&1 | FileCheck %s
-
-; CHECK: LLVM ERROR: Codegen not yet implemented for RVE
-
-define void @nothing() nounwind {
- ret void
-}
diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
index c93153e8f9d1ecb..56723745d012dd2 100644
--- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
+++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I-LP64E
declare void @callee(ptr, ptr)
@@ -34,6 +38,33 @@ define void @caller(i32 %n) {
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -64
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64
+; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: .cfi_offset s1, -12
+; RV32I-ILP32E-NEXT: addi s0, sp, 64
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: andi sp, sp, -64
+; RV32I-ILP32E-NEXT: mv s1, sp
+; RV32I-ILP32E-NEXT: addi a0, a0, 3
+; RV32I-ILP32E-NEXT: andi a0, a0, -4
+; RV32I-ILP32E-NEXT: sub a0, sp, a0
+; RV32I-ILP32E-NEXT: mv sp, a0
+; RV32I-ILP32E-NEXT: mv a1, s1
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -64
+; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 64
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -64
@@ -62,6 +93,35 @@ define void @caller(i32 %n) {
; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -64
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 64
+; RV64I-LP64E-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: .cfi_offset s1, -24
+; RV64I-LP64E-NEXT: addi s0, sp, 64
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: andi sp, sp, -64
+; RV64I-LP64E-NEXT: mv s1, sp
+; RV64I-LP64E-NEXT: slli a0, a0, 32
+; RV64I-LP64E-NEXT: srli a0, a0, 32
+; RV64I-LP64E-NEXT: addi a0, a0, 7
+; RV64I-LP64E-NEXT: andi a0, a0, -8
+; RV64I-LP64E-NEXT: sub a0, sp, a0
+; RV64I-LP64E-NEXT: mv sp, a0
+; RV64I-LP64E-NEXT: mv a1, s1
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -64
+; RV64I-LP64E-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 64
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, i32 %n
%2 = alloca i32, align 64
call void @callee(ptr %1, ptr %2)
diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll
index afa8efedbff3e9d..4feb91cace4abec 100644
--- a/llvm/test/CodeGen/RISCV/stack-realignment.ll
+++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll
@@ -1,11 +1,135 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I-LP64E
declare void @callee(ptr)
+define void @caller16() {
+; RV32I-LABEL: caller16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: mv a0, sp
+; RV32I-NEXT: call callee
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32I-ILP32E-LABEL: caller16:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -16
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 16
+; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 16
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: andi sp, sp, -16
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -16
+; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 16
+; RV32I-ILP32E-NEXT: ret
+;
+; RV64I-LABEL: caller16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: mv a0, sp
+; RV64I-NEXT: call callee
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller16:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -32
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 32
+; RV64I-LP64E-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 32
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: andi sp, sp, -16
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -32
+; RV64I-LP64E-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 32
+; RV64I-LP64E-NEXT: ret
+ %1 = alloca i8, align 16
+ call void @callee(i8* %1)
+ ret void
+}
+
+define void @caller_no_realign16() "no-realign-stack" {
+; RV32I-LABEL: caller_no_realign16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: mv a0, sp
+; RV32I-NEXT: call callee
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32I-ILP32E-LABEL: caller_no_realign16:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
+; RV64I-LABEL: caller_no_realign16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: mv a0, sp
+; RV64I-NEXT: call callee
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign16:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
+ %1 = alloca i8, align 16
+ call void @callee(i8* %1)
+ ret void
+}
+
define void @caller32() {
; RV32I-LABEL: caller32:
; RV32I: # %bb.0:
@@ -26,6 +150,25 @@ define void @caller32() {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller32:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -32
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 32
+; RV32I-ILP32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 32
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: andi sp, sp, -32
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -32
+; RV32I-ILP32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 32
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller32:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -44,6 +187,25 @@ define void @caller32() {
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller32:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -32
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 32
+; RV64I-LP64E-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 32
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: andi sp, sp, -32
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -32
+; RV64I-LP64E-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 32
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 32
call void @callee(ptr %1)
ret void
@@ -62,6 +224,18 @@ define void @caller_no_realign32() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign32:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign32:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -73,6 +247,18 @@ define void @caller_no_realign32() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign32:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 32
call void @callee(ptr %1)
ret void
@@ -98,6 +284,25 @@ define void @caller64() {
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller64:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -64
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64
+; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 64
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: andi sp, sp, -64
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -64
+; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 64
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller64:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -64
@@ -116,6 +321,25 @@ define void @caller64() {
; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller64:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -64
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 64
+; RV64I-LP64E-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 64
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: andi sp, sp, -64
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -64
+; RV64I-LP64E-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 64
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 64
call void @callee(ptr %1)
ret void
@@ -134,6 +358,18 @@ define void @caller_no_realign64() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign64:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign64:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -145,6 +381,18 @@ define void @caller_no_realign64() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign64:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 64
call void @callee(ptr %1)
ret void
@@ -170,6 +418,25 @@ define void @caller128() {
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller128:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -128
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 128
+; RV32I-ILP32E-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 128
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: andi sp, sp, -128
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -128
+; RV32I-ILP32E-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 128
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller128:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -128
@@ -188,6 +455,25 @@ define void @caller128() {
; RV64I-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 128
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller128:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -128
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 128
+; RV64I-LP64E-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 128
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: andi sp, sp, -128
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -128
+; RV64I-LP64E-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 128
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 128
call void @callee(ptr %1)
ret void
@@ -206,6 +492,18 @@ define void @caller_no_realign128() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign128:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign128:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -217,6 +515,18 @@ define void @caller_no_realign128() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign128:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 128
call void @callee(ptr %1)
ret void
@@ -242,6 +552,25 @@ define void @caller256() {
; RV32I-NEXT: addi sp, sp, 256
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller256:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -256
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 256
+; RV32I-ILP32E-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 256
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: andi sp, sp, -256
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -256
+; RV32I-ILP32E-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 256
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller256:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -256
@@ -260,6 +589,25 @@ define void @caller256() {
; RV64I-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 256
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller256:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -256
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 256
+; RV64I-LP64E-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 256
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: andi sp, sp, -256
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -256
+; RV64I-LP64E-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 256
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 256
call void @callee(ptr %1)
ret void
@@ -278,6 +626,18 @@ define void @caller_no_realign256() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign256:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign256:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -289,6 +649,18 @@ define void @caller_no_realign256() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign256:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 256
call void @callee(ptr %1)
ret void
@@ -314,6 +686,25 @@ define void @caller512() {
; RV32I-NEXT: addi sp, sp, 1024
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller512:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -1024
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 1024
+; RV32I-ILP32E-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 1016(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 1024
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: andi sp, sp, -512
+; RV32I-ILP32E-NEXT: addi a0, sp, 512
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -1024
+; RV32I-ILP32E-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 1016(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 1024
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller512:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -1024
@@ -332,6 +723,25 @@ define void @caller512() {
; RV64I-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 1024
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller512:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -1024
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 1024
+; RV64I-LP64E-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 1008(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 1024
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: andi sp, sp, -512
+; RV64I-LP64E-NEXT: addi a0, sp, 512
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -1024
+; RV64I-LP64E-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 1024
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 512
call void @callee(ptr %1)
ret void
@@ -350,6 +760,18 @@ define void @caller_no_realign512() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign512:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign512:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -361,6 +783,18 @@ define void @caller_no_realign512() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign512:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 512
call void @callee(ptr %1)
ret void
@@ -388,6 +822,27 @@ define void @caller1024() {
; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller1024:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -2044
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044
+; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 2044
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: addi sp, sp, -4
+; RV32I-ILP32E-NEXT: andi sp, sp, -1024
+; RV32I-ILP32E-NEXT: addi a0, sp, 1024
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: addi sp, s0, -2048
+; RV32I-ILP32E-NEXT: addi sp, sp, 4
+; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 2044
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller1024:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -2032
@@ -408,6 +863,27 @@ define void @caller1024() {
; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller1024:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -2040
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040
+; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 2040
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: addi sp, sp, -8
+; RV64I-LP64E-NEXT: andi sp, sp, -1024
+; RV64I-LP64E-NEXT: addi a0, sp, 1024
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: addi sp, s0, -2048
+; RV64I-LP64E-NEXT: addi sp, sp, 8
+; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 2040
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 1024
call void @callee(ptr %1)
ret void
@@ -426,6 +902,18 @@ define void @caller_no_realign1024() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign1024:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign1024:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -437,6 +925,18 @@ define void @caller_no_realign1024() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign1024:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 1024
call void @callee(ptr %1)
ret void
@@ -468,6 +968,31 @@ define void @caller2048() {
; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller2048:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -2044
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044
+; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 2044
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: addi sp, sp, -2048
+; RV32I-ILP32E-NEXT: addi sp, sp, -4
+; RV32I-ILP32E-NEXT: andi sp, sp, -2048
+; RV32I-ILP32E-NEXT: addi a0, sp, 2047
+; RV32I-ILP32E-NEXT: addi a0, a0, 1
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lui a0, 1
+; RV32I-ILP32E-NEXT: sub sp, s0, a0
+; RV32I-ILP32E-NEXT: addi sp, sp, 2044
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 2044
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller2048:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -2032
@@ -492,6 +1017,31 @@ define void @caller2048() {
; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller2048:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -2040
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040
+; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 2040
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: addi sp, sp, -2048
+; RV64I-LP64E-NEXT: addi sp, sp, -8
+; RV64I-LP64E-NEXT: andi sp, sp, -2048
+; RV64I-LP64E-NEXT: addi a0, sp, 2047
+; RV64I-LP64E-NEXT: addi a0, a0, 1
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: lui a0, 1
+; RV64I-LP64E-NEXT: sub sp, s0, a0
+; RV64I-LP64E-NEXT: addi sp, sp, 2040
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 2040
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 2048
call void @callee(ptr %1)
ret void
@@ -510,6 +1060,18 @@ define void @caller_no_realign2048() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign2048:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign2048:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -521,6 +1083,18 @@ define void @caller_no_realign2048() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign2048:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 2048
call void @callee(ptr %1)
ret void
@@ -554,6 +1128,33 @@ define void @caller4096() {
; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller4096:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -2044
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044
+; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: .cfi_offset s0, -8
+; RV32I-ILP32E-NEXT: addi s0, sp, 2044
+; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0
+; RV32I-ILP32E-NEXT: lui a0, 2
+; RV32I-ILP32E-NEXT: addi a0, a0, -2044
+; RV32I-ILP32E-NEXT: sub sp, sp, a0
+; RV32I-ILP32E-NEXT: srli a0, sp, 12
+; RV32I-ILP32E-NEXT: slli sp, a0, 12
+; RV32I-ILP32E-NEXT: lui a0, 1
+; RV32I-ILP32E-NEXT: add a0, sp, a0
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lui a0, 2
+; RV32I-ILP32E-NEXT: sub sp, s0, a0
+; RV32I-ILP32E-NEXT: addi a0, a0, -2044
+; RV32I-ILP32E-NEXT: add sp, sp, a0
+; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 2044
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller4096:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -2032
@@ -580,6 +1181,33 @@ define void @caller4096() {
; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller4096:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -2040
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040
+; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: .cfi_offset s0, -16
+; RV64I-LP64E-NEXT: addi s0, sp, 2040
+; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0
+; RV64I-LP64E-NEXT: lui a0, 2
+; RV64I-LP64E-NEXT: addiw a0, a0, -2040
+; RV64I-LP64E-NEXT: sub sp, sp, a0
+; RV64I-LP64E-NEXT: srli a0, sp, 12
+; RV64I-LP64E-NEXT: slli sp, a0, 12
+; RV64I-LP64E-NEXT: lui a0, 1
+; RV64I-LP64E-NEXT: add a0, sp, a0
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: lui a0, 2
+; RV64I-LP64E-NEXT: sub sp, s0, a0
+; RV64I-LP64E-NEXT: addiw a0, a0, -2040
+; RV64I-LP64E-NEXT: add sp, sp, a0
+; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 2040
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 4096
call void @callee(ptr %1)
ret void
@@ -598,6 +1226,18 @@ define void @caller_no_realign4096() "no-realign-stack" {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ILP32E-LABEL: caller_no_realign4096:
+; RV32I-ILP32E: # %bb.0:
+; RV32I-ILP32E-NEXT: addi sp, sp, -8
+; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8
+; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT: .cfi_offset ra, -4
+; RV32I-ILP32E-NEXT: mv a0, sp
+; RV32I-ILP32E-NEXT: call callee
+; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT: addi sp, sp, 8
+; RV32I-ILP32E-NEXT: ret
+;
; RV64I-LABEL: caller_no_realign4096:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -609,6 +1249,18 @@ define void @caller_no_realign4096() "no-realign-stack" {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-LP64E-LABEL: caller_no_realign4096:
+; RV64I-LP64E: # %bb.0:
+; RV64I-LP64E-NEXT: addi sp, sp, -16
+; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16
+; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT: .cfi_offset ra, -8
+; RV64I-LP64E-NEXT: mv a0, sp
+; RV64I-LP64E-NEXT: call callee
+; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT: addi sp, sp, 16
+; RV64I-LP64E-NEXT: ret
%1 = alloca i8, align 4096
call void @callee(ptr %1)
ret void
diff --git a/llvm/test/CodeGen/RISCV/target-abi-valid.ll b/llvm/test/CodeGen/RISCV/target-abi-valid.ll
index 2d4079601f7bc46..53e4270857332f2 100644
--- a/llvm/test/CodeGen/RISCV/target-abi-valid.ll
+++ b/llvm/test/CodeGen/RISCV/target-abi-valid.ll
@@ -2,6 +2,8 @@
; RUN: | FileCheck -check-prefix=CHECK-IMP %s
; RUN: llc -mtriple=riscv32 -target-abi ilp32 < %s \
; RUN: | FileCheck -check-prefix=CHECK-IMP %s
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \
+; RUN: | FileCheck -check-prefix=CHECK-IMP %s
; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 < %s \
; RUN: | FileCheck -check-prefix=CHECK-IMP %s
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32 < %s \
@@ -10,6 +12,8 @@
; RUN: | FileCheck -check-prefix=CHECK-IMP %s
; RUN: llc -mtriple=riscv64 -target-abi lp64 < %s \
; RUN: | FileCheck -check-prefix=CHECK-IMP %s
+; RUN: llc -mtriple=riscv64 -target-abi lp64e < %s \
+; RUN: | FileCheck -check-prefix=CHECK-IMP %s
; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64 < %s \
; RUN: | FileCheck -check-prefix=CHECK-IMP %s
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64 < %s \
@@ -33,8 +37,3 @@ define void @nothing() nounwind {
; CHECK-IMP-NEXT: ret
ret void
}
-
-; RUN: not --crash llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \
-; RUN: | FileCheck -check-prefix=CHECK-UNIMP %s
-
-; CHECK-UNIMP: LLVM ERROR: Don't know how to lower this ABI
diff --git a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll
new file mode 100644
index 000000000000000..c45eb3738e6e2f9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=ILP32E %s
+; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @abort()
+
+define i32 @caller(i32 %a) {
+; ILP32E-LABEL: caller:
+; ILP32E: # %bb.0: # %entry
+; ILP32E-NEXT: addi sp, sp, -8
+; ILP32E-NEXT: .cfi_def_cfa_offset 8
+; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: .cfi_offset ra, -4
+; ILP32E-NEXT: .cfi_offset s0, -8
+; ILP32E-NEXT: mv s0, a0
+; ILP32E-NEXT: li a0, 1
+; ILP32E-NEXT: lui a2, 262144
+; ILP32E-NEXT: li a1, 0
+; ILP32E-NEXT: call va_double
+; ILP32E-NEXT: mv a0, s0
+; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: addi sp, sp, 8
+; ILP32E-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: caller:
+; ILP32E-WITHFP: # %bb.0: # %entry
+; ILP32E-WITHFP-NEXT: addi sp, sp, -12
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12
+; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
+; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12
+; ILP32E-WITHFP-NEXT: addi s0, sp, 12
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; ILP32E-WITHFP-NEXT: mv s1, a0
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: lui a2, 262144
+; ILP32E-WITHFP-NEXT: li a1, 0
+; ILP32E-WITHFP-NEXT: call va_double
+; ILP32E-WITHFP-NEXT: mv a0, s1
+; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 12
+; ILP32E-WITHFP-NEXT: ret
+entry:
+ call void (i32, ...) @va_double(i32 1, double 2.000000e+00)
+ ret i32 %a
+}
+
+define void @va_double(i32 %n, ...) {
+; ILP32E-LABEL: va_double:
+; ILP32E: # %bb.0: # %entry
+; ILP32E-NEXT: addi sp, sp, -32
+; ILP32E-NEXT: .cfi_def_cfa_offset 32
+; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-NEXT: .cfi_offset ra, -28
+; ILP32E-NEXT: sw a5, 28(sp)
+; ILP32E-NEXT: sw a4, 24(sp)
+; ILP32E-NEXT: sw a3, 20(sp)
+; ILP32E-NEXT: sw a2, 16(sp)
+; ILP32E-NEXT: sw a1, 12(sp)
+; ILP32E-NEXT: addi a0, sp, 12
+; ILP32E-NEXT: sw a0, 0(sp)
+; ILP32E-NEXT: addi a0, sp, 19
+; ILP32E-NEXT: andi a1, a0, -8
+; ILP32E-NEXT: addi a0, a1, 8
+; ILP32E-NEXT: sw a0, 0(sp)
+; ILP32E-NEXT: lw a0, 0(a1)
+; ILP32E-NEXT: lw a1, 4(a1)
+; ILP32E-NEXT: lui a3, 262144
+; ILP32E-NEXT: li a2, 0
+; ILP32E-NEXT: call __eqdf2
+; ILP32E-NEXT: bnez a0, .LBB1_2
+; ILP32E-NEXT: # %bb.1: # %if.end
+; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-NEXT: addi sp, sp, 32
+; ILP32E-NEXT: ret
+; ILP32E-NEXT: .LBB1_2: # %if.then
+; ILP32E-NEXT: call abort
+;
+; ILP32E-WITHFP-LABEL: va_double:
+; ILP32E-WITHFP: # %bb.0: # %entry
+; ILP32E-WITHFP-NEXT: addi sp, sp, -36
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36
+; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32
+; ILP32E-WITHFP-NEXT: addi s0, sp, 12
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 4
+; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 11
+; ILP32E-WITHFP-NEXT: andi a1, a0, -8
+; ILP32E-WITHFP-NEXT: addi a0, a1, 8
+; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a0, 0(a1)
+; ILP32E-WITHFP-NEXT: lw a1, 4(a1)
+; ILP32E-WITHFP-NEXT: lui a3, 262144
+; ILP32E-WITHFP-NEXT: li a2, 0
+; ILP32E-WITHFP-NEXT: call __eqdf2
+; ILP32E-WITHFP-NEXT: bnez a0, .LBB1_2
+; ILP32E-WITHFP-NEXT: # %bb.1: # %if.end
+; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 36
+; ILP32E-WITHFP-NEXT: ret
+; ILP32E-WITHFP-NEXT: .LBB1_2: # %if.then
+; ILP32E-WITHFP-NEXT: call abort
+entry:
+ %args = alloca i8*, align 4
+ %args1 = bitcast i8** %args to i8*
+ call void @llvm.va_start(i8* %args1)
+ %argp.cur = load i8*, i8** %args, align 4
+ %0 = ptrtoint i8* %argp.cur to i32
+ %1 = add i32 %0, 7
+ %2 = and i32 %1, -8
+ %argp.cur.aligned = inttoptr i32 %2 to i8*
+ %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 8
+ store i8* %argp.next, i8** %args, align 4
+ %3 = bitcast i8* %argp.cur.aligned to double*
+ %4 = load double, double* %3, align 8
+ %cmp = fcmp une double %4, 2.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ call void @abort()
+ unreachable
+
+if.end:
+ %args2 = bitcast i8** %args to i8*
+ call void @llvm.va_end(i8* %args2)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll
index 8adce4bc466dbe5..14afbae3afbdd66 100644
--- a/llvm/test/CodeGen/RISCV/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/vararg.ll
@@ -11,6 +11,12 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d \
; RUN: -verify-machineinstrs \
; RUN: | FileCheck -check-prefix=RV32D-ILP32-ILP32F-ILP32D-FPELIM %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -target-abi ilp32e \
+; RUN: -verify-machineinstrs \
+; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \
+; RUN: -verify-machineinstrs \
+; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -verify-machineinstrs \
; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d -target-abi lp64f \
@@ -21,6 +27,12 @@
; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all \
; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-WITHFP %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -target-abi lp64e \
+; RUN: -verify-machineinstrs \
+; RUN: | FileCheck -check-prefix=LP64E-FPELIM %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -target-abi lp64e -frame-pointer=all \
+; RUN: -verify-machineinstrs \
+; RUN: | FileCheck -check-prefix=LP64E-WITHFP %s
; The same vararg calling convention is used for ilp32/ilp32f/ilp32d and for
; lp64/lp64f/lp64d. Different CHECK lines are required for RV32D due to slight
@@ -97,6 +109,44 @@ define i32 @va1(ptr %fmt, ...) {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va1:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32
+; ILP32E-FPELIM-NEXT: mv a0, a1
+; ILP32E-FPELIM-NEXT: sw a5, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 12(sp)
+; ILP32E-FPELIM-NEXT: addi a1, sp, 16
+; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va1:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24
+; ILP32E-WITHFP-NEXT: mv a0, a1
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a1, s0, 8
+; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va1:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
@@ -138,6 +188,44 @@ define i32 @va1(ptr %fmt, ...) {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va1:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -64
+; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 64
+; LP64E-FPELIM-NEXT: sd a1, 24(sp)
+; LP64E-FPELIM-NEXT: addi a0, sp, 28
+; LP64E-FPELIM-NEXT: sd a0, 8(sp)
+; LP64E-FPELIM-NEXT: lw a0, 24(sp)
+; LP64E-FPELIM-NEXT: sd a5, 56(sp)
+; LP64E-FPELIM-NEXT: sd a4, 48(sp)
+; LP64E-FPELIM-NEXT: sd a3, 40(sp)
+; LP64E-FPELIM-NEXT: sd a2, 32(sp)
+; LP64E-FPELIM-NEXT: addi sp, sp, 64
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va1:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -80
+; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 80
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: .cfi_offset ra, -56
+; LP64E-WITHFP-NEXT: .cfi_offset s0, -64
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: addi a0, s0, 12
+; LP64E-WITHFP-NEXT: sd a0, -24(s0)
+; LP64E-WITHFP-NEXT: lw a0, 8(s0)
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 80
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%argp.cur = load ptr, ptr %va, align 4
@@ -202,6 +290,39 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va1_va_arg:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: mv a0, a1
+; ILP32E-FPELIM-NEXT: sw a5, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 12(sp)
+; ILP32E-FPELIM-NEXT: addi a1, sp, 16
+; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va1_va_arg:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: mv a0, a1
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a1, s0, 8
+; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
@@ -238,6 +359,39 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va1_va_arg:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -64
+; LP64E-FPELIM-NEXT: mv a0, a1
+; LP64E-FPELIM-NEXT: sd a5, 56(sp)
+; LP64E-FPELIM-NEXT: sd a4, 48(sp)
+; LP64E-FPELIM-NEXT: sd a3, 40(sp)
+; LP64E-FPELIM-NEXT: sd a2, 32(sp)
+; LP64E-FPELIM-NEXT: sd a1, 24(sp)
+; LP64E-FPELIM-NEXT: addi a1, sp, 32
+; LP64E-FPELIM-NEXT: sd a1, 8(sp)
+; LP64E-FPELIM-NEXT: addi sp, sp, 64
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va1_va_arg:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -80
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: mv a0, a1
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: addi a1, s0, 16
+; LP64E-WITHFP-NEXT: sd a1, -24(s0)
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 80
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%1 = va_arg ptr %va, i32
@@ -338,6 +492,62 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va1_va_arg_alloca:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -40
+; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: addi s0, sp, 16
+; ILP32E-FPELIM-NEXT: mv s1, a1
+; ILP32E-FPELIM-NEXT: sw a5, 20(s0)
+; ILP32E-FPELIM-NEXT: sw a4, 16(s0)
+; ILP32E-FPELIM-NEXT: sw a3, 12(s0)
+; ILP32E-FPELIM-NEXT: sw a2, 8(s0)
+; ILP32E-FPELIM-NEXT: sw a1, 4(s0)
+; ILP32E-FPELIM-NEXT: addi a0, s0, 8
+; ILP32E-FPELIM-NEXT: sw a0, -16(s0)
+; ILP32E-FPELIM-NEXT: addi a0, a1, 3
+; ILP32E-FPELIM-NEXT: andi a0, a0, -4
+; ILP32E-FPELIM-NEXT: sub a0, sp, a0
+; ILP32E-FPELIM-NEXT: mv sp, a0
+; ILP32E-FPELIM-NEXT: call notdead
+; ILP32E-FPELIM-NEXT: mv a0, s1
+; ILP32E-FPELIM-NEXT: addi sp, s0, -16
+; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 40
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va1_va_arg_alloca:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -40
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: mv s1, a1
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 8
+; ILP32E-WITHFP-NEXT: sw a0, -16(s0)
+; ILP32E-WITHFP-NEXT: addi a0, a1, 3
+; ILP32E-WITHFP-NEXT: andi a0, a0, -4
+; ILP32E-WITHFP-NEXT: sub a0, sp, a0
+; ILP32E-WITHFP-NEXT: mv sp, a0
+; ILP32E-WITHFP-NEXT: call notdead
+; ILP32E-WITHFP-NEXT: mv a0, s1
+; ILP32E-WITHFP-NEXT: addi sp, s0, -16
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 40
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg_alloca:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96
@@ -401,6 +611,66 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va1_va_arg_alloca:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -80
+; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: addi s0, sp, 32
+; LP64E-FPELIM-NEXT: mv s1, a1
+; LP64E-FPELIM-NEXT: sd a5, 40(s0)
+; LP64E-FPELIM-NEXT: sd a4, 32(s0)
+; LP64E-FPELIM-NEXT: sd a3, 24(s0)
+; LP64E-FPELIM-NEXT: sd a2, 16(s0)
+; LP64E-FPELIM-NEXT: sd a1, 8(s0)
+; LP64E-FPELIM-NEXT: addi a0, s0, 16
+; LP64E-FPELIM-NEXT: sd a0, -32(s0)
+; LP64E-FPELIM-NEXT: slli a0, a1, 32
+; LP64E-FPELIM-NEXT: srli a0, a0, 32
+; LP64E-FPELIM-NEXT: addi a0, a0, 7
+; LP64E-FPELIM-NEXT: andi a0, a0, -8
+; LP64E-FPELIM-NEXT: sub a0, sp, a0
+; LP64E-FPELIM-NEXT: mv sp, a0
+; LP64E-FPELIM-NEXT: call notdead
+; LP64E-FPELIM-NEXT: mv a0, s1
+; LP64E-FPELIM-NEXT: addi sp, s0, -32
+; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: addi sp, sp, 80
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va1_va_arg_alloca:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -80
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: mv s1, a1
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: addi a0, s0, 16
+; LP64E-WITHFP-NEXT: sd a0, -32(s0)
+; LP64E-WITHFP-NEXT: slli a0, a1, 32
+; LP64E-WITHFP-NEXT: srli a0, a0, 32
+; LP64E-WITHFP-NEXT: addi a0, a0, 7
+; LP64E-WITHFP-NEXT: andi a0, a0, -8
+; LP64E-WITHFP-NEXT: sub a0, sp, a0
+; LP64E-WITHFP-NEXT: mv sp, a0
+; LP64E-WITHFP-NEXT: call notdead
+; LP64E-WITHFP-NEXT: mv a0, s1
+; LP64E-WITHFP-NEXT: addi sp, s0, -32
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 80
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%1 = va_arg ptr %va, i32
@@ -451,6 +721,33 @@ define void @va1_caller() nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va1_caller:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: lui a2, 261888
+; ILP32E-FPELIM-NEXT: li a3, 2
+; ILP32E-FPELIM-NEXT: li a1, 0
+; ILP32E-FPELIM-NEXT: call va1
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va1_caller:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: lui a2, 261888
+; ILP32E-WITHFP-NEXT: li a3, 2
+; ILP32E-WITHFP-NEXT: li a1, 0
+; ILP32E-WITHFP-NEXT: call va1
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va1_caller:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16
@@ -477,6 +774,33 @@ define void @va1_caller() nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va1_caller:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -8
+; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: li a1, 1023
+; LP64E-FPELIM-NEXT: slli a1, a1, 52
+; LP64E-FPELIM-NEXT: li a2, 2
+; LP64E-FPELIM-NEXT: call va1
+; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: addi sp, sp, 8
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va1_caller:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -16
+; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 16
+; LP64E-WITHFP-NEXT: li a1, 1023
+; LP64E-WITHFP-NEXT: slli a1, a1, 52
+; LP64E-WITHFP-NEXT: li a2, 2
+; LP64E-WITHFP-NEXT: call va1
+; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 16
+; LP64E-WITHFP-NEXT: ret
%1 = call i32 (ptr, ...) @va1(ptr undef, double 1.0, i32 2)
ret void
}
@@ -553,6 +877,49 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va2:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: sw a5, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 12(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 12
+; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 19
+; ILP32E-FPELIM-NEXT: andi a0, a0, -8
+; ILP32E-FPELIM-NEXT: addi a1, sp, 27
+; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a1, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a0, 0(a0)
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va2:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 4
+; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 11
+; ILP32E-WITHFP-NEXT: andi a0, a0, -8
+; ILP32E-WITHFP-NEXT: addi a1, s0, 19
+; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a1, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a0, 0(a0)
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va2:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
@@ -589,6 +956,39 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va2:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -64
+; LP64E-FPELIM-NEXT: mv a0, a1
+; LP64E-FPELIM-NEXT: sd a5, 56(sp)
+; LP64E-FPELIM-NEXT: sd a4, 48(sp)
+; LP64E-FPELIM-NEXT: sd a3, 40(sp)
+; LP64E-FPELIM-NEXT: sd a2, 32(sp)
+; LP64E-FPELIM-NEXT: sd a1, 24(sp)
+; LP64E-FPELIM-NEXT: addi a1, sp, 39
+; LP64E-FPELIM-NEXT: sd a1, 8(sp)
+; LP64E-FPELIM-NEXT: addi sp, sp, 64
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va2:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -80
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: mv a0, a1
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: addi a1, s0, 23
+; LP64E-WITHFP-NEXT: sd a1, -24(s0)
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 80
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%argp.cur = load ptr, ptr %va
@@ -674,6 +1074,49 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va2_va_arg:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: sw a5, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 12(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 19
+; ILP32E-FPELIM-NEXT: andi a1, a0, -8
+; ILP32E-FPELIM-NEXT: addi a0, a1, 4
+; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a0, 0(a1)
+; ILP32E-FPELIM-NEXT: addi a2, a1, 8
+; ILP32E-FPELIM-NEXT: sw a2, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a1, 4(a1)
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va2_va_arg:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 11
+; ILP32E-WITHFP-NEXT: andi a1, a0, -8
+; ILP32E-WITHFP-NEXT: addi a0, a1, 4
+; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a0, 0(a1)
+; ILP32E-WITHFP-NEXT: addi a2, a1, 8
+; ILP32E-WITHFP-NEXT: sw a2, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a1, 4(a1)
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va2_va_arg:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
@@ -710,6 +1153,39 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va2_va_arg:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -64
+; LP64E-FPELIM-NEXT: mv a0, a1
+; LP64E-FPELIM-NEXT: sd a5, 56(sp)
+; LP64E-FPELIM-NEXT: sd a4, 48(sp)
+; LP64E-FPELIM-NEXT: sd a3, 40(sp)
+; LP64E-FPELIM-NEXT: sd a2, 32(sp)
+; LP64E-FPELIM-NEXT: sd a1, 24(sp)
+; LP64E-FPELIM-NEXT: addi a1, sp, 32
+; LP64E-FPELIM-NEXT: sd a1, 8(sp)
+; LP64E-FPELIM-NEXT: addi sp, sp, 64
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va2_va_arg:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -80
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: mv a0, a1
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: addi a1, s0, 16
+; LP64E-WITHFP-NEXT: sd a1, -24(s0)
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 80
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%1 = va_arg ptr %va, double
@@ -755,6 +1231,31 @@ define void @va2_caller() nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va2_caller:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: lui a2, 261888
+; ILP32E-FPELIM-NEXT: li a1, 0
+; ILP32E-FPELIM-NEXT: call va2
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va2_caller:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: lui a2, 261888
+; ILP32E-WITHFP-NEXT: li a1, 0
+; ILP32E-WITHFP-NEXT: call va2
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va2_caller:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16
@@ -779,6 +1280,31 @@ define void @va2_caller() nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va2_caller:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -8
+; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: li a1, 1023
+; LP64E-FPELIM-NEXT: slli a1, a1, 52
+; LP64E-FPELIM-NEXT: call va2
+; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: addi sp, sp, 8
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va2_caller:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -16
+; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 16
+; LP64E-WITHFP-NEXT: li a1, 1023
+; LP64E-WITHFP-NEXT: slli a1, a1, 52
+; LP64E-WITHFP-NEXT: call va2
+; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 16
+; LP64E-WITHFP-NEXT: ret
%1 = call i64 (ptr, ...) @va2(ptr undef, double 1.000000e+00)
ret void
}
@@ -861,6 +1387,53 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va3:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: sw a5, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 20(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 20
+; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 27
+; ILP32E-FPELIM-NEXT: andi a0, a0, -8
+; ILP32E-FPELIM-NEXT: addi a3, sp, 35
+; ILP32E-FPELIM-NEXT: sw a3, 12(sp)
+; ILP32E-FPELIM-NEXT: lw a3, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a0, 0(a0)
+; ILP32E-FPELIM-NEXT: add a2, a2, a3
+; ILP32E-FPELIM-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-NEXT: sltu a1, a0, a1
+; ILP32E-FPELIM-NEXT: add a1, a2, a1
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va3:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -32
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: sw a5, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 4
+; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 11
+; ILP32E-WITHFP-NEXT: andi a0, a0, -8
+; ILP32E-WITHFP-NEXT: addi a3, s0, 19
+; ILP32E-WITHFP-NEXT: sw a3, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a3, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a0, 0(a0)
+; ILP32E-WITHFP-NEXT: add a2, a2, a3
+; ILP32E-WITHFP-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-NEXT: sltu a1, a0, a1
+; ILP32E-WITHFP-NEXT: add a1, a2, a1
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 32
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va3:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64
@@ -895,6 +1468,37 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va3:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -48
+; LP64E-FPELIM-NEXT: sd a5, 40(sp)
+; LP64E-FPELIM-NEXT: sd a4, 32(sp)
+; LP64E-FPELIM-NEXT: sd a3, 24(sp)
+; LP64E-FPELIM-NEXT: sd a2, 16(sp)
+; LP64E-FPELIM-NEXT: addi a3, sp, 31
+; LP64E-FPELIM-NEXT: add a0, a1, a2
+; LP64E-FPELIM-NEXT: sd a3, 8(sp)
+; LP64E-FPELIM-NEXT: addi sp, sp, 48
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va3:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -64
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: sd a5, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 0(s0)
+; LP64E-WITHFP-NEXT: addi a3, s0, 15
+; LP64E-WITHFP-NEXT: add a0, a1, a2
+; LP64E-WITHFP-NEXT: sd a3, -24(s0)
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 64
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%argp.cur = load ptr, ptr %va
@@ -987,6 +1591,53 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va3_va_arg:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: sw a5, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 20(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 27
+; ILP32E-FPELIM-NEXT: andi a0, a0, -8
+; ILP32E-FPELIM-NEXT: addi a3, a0, 4
+; ILP32E-FPELIM-NEXT: sw a3, 12(sp)
+; ILP32E-FPELIM-NEXT: lw a3, 0(a0)
+; ILP32E-FPELIM-NEXT: addi a4, a0, 8
+; ILP32E-FPELIM-NEXT: sw a4, 12(sp)
+; ILP32E-FPELIM-NEXT: lw a4, 4(a0)
+; ILP32E-FPELIM-NEXT: add a0, a1, a3
+; ILP32E-FPELIM-NEXT: sltu a1, a0, a1
+; ILP32E-FPELIM-NEXT: add a2, a2, a4
+; ILP32E-FPELIM-NEXT: add a1, a2, a1
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va3_va_arg:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -32
+; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 16
+; ILP32E-WITHFP-NEXT: sw a5, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 11
+; ILP32E-WITHFP-NEXT: andi a0, a0, -8
+; ILP32E-WITHFP-NEXT: addi a3, a0, 4
+; ILP32E-WITHFP-NEXT: sw a3, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a3, 0(a0)
+; ILP32E-WITHFP-NEXT: addi a4, a0, 8
+; ILP32E-WITHFP-NEXT: sw a4, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a4, 4(a0)
+; ILP32E-WITHFP-NEXT: add a0, a1, a3
+; ILP32E-WITHFP-NEXT: sltu a1, a0, a1
+; ILP32E-WITHFP-NEXT: add a2, a2, a4
+; ILP32E-WITHFP-NEXT: add a1, a2, a1
+; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 32
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64
@@ -1021,6 +1672,37 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va3_va_arg:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -48
+; LP64E-FPELIM-NEXT: sd a5, 40(sp)
+; LP64E-FPELIM-NEXT: sd a4, 32(sp)
+; LP64E-FPELIM-NEXT: sd a3, 24(sp)
+; LP64E-FPELIM-NEXT: sd a2, 16(sp)
+; LP64E-FPELIM-NEXT: addi a3, sp, 24
+; LP64E-FPELIM-NEXT: add a0, a1, a2
+; LP64E-FPELIM-NEXT: sd a3, 8(sp)
+; LP64E-FPELIM-NEXT: addi sp, sp, 48
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va3_va_arg:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -64
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: sd a5, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 0(s0)
+; LP64E-WITHFP-NEXT: addi a3, s0, 8
+; LP64E-WITHFP-NEXT: add a0, a1, a2
+; LP64E-WITHFP-NEXT: sd a3, -24(s0)
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 64
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%1 = va_arg ptr %va, double
@@ -1076,6 +1758,37 @@ define void @va3_caller() nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va3_caller:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -4
+; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: li a0, 2
+; ILP32E-FPELIM-NEXT: li a1, 1111
+; ILP32E-FPELIM-NEXT: lui a4, 262144
+; ILP32E-FPELIM-NEXT: li a2, 0
+; ILP32E-FPELIM-NEXT: li a3, 0
+; ILP32E-FPELIM-NEXT: call va3
+; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 4
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va3_caller:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -8
+; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 8
+; ILP32E-WITHFP-NEXT: li a0, 2
+; ILP32E-WITHFP-NEXT: li a1, 1111
+; ILP32E-WITHFP-NEXT: lui a4, 262144
+; ILP32E-WITHFP-NEXT: li a2, 0
+; ILP32E-WITHFP-NEXT: li a3, 0
+; ILP32E-WITHFP-NEXT: call va3
+; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 8
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va3_caller:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16
@@ -1104,6 +1817,35 @@ define void @va3_caller() nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va3_caller:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -8
+; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: li a2, 1
+; LP64E-FPELIM-NEXT: slli a2, a2, 62
+; LP64E-FPELIM-NEXT: li a0, 2
+; LP64E-FPELIM-NEXT: li a1, 1111
+; LP64E-FPELIM-NEXT: call va3
+; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: addi sp, sp, 8
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va3_caller:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -16
+; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 16
+; LP64E-WITHFP-NEXT: li a2, 1
+; LP64E-WITHFP-NEXT: slli a2, a2, 62
+; LP64E-WITHFP-NEXT: li a0, 2
+; LP64E-WITHFP-NEXT: li a1, 1111
+; LP64E-WITHFP-NEXT: call va3
+; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 16
+; LP64E-WITHFP-NEXT: ret
%1 = call i64 (i32, i64, ...) @va3(i32 2, i64 1111, double 2.000000e+00)
ret void
}
@@ -1237,6 +1979,87 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va4_va_copy:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -40
+; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: mv s0, a1
+; ILP32E-FPELIM-NEXT: sw a5, 36(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 32(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32E-FPELIM-NEXT: addi a0, sp, 24
+; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: call notdead
+; ILP32E-FPELIM-NEXT: lw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: addi a0, a0, 3
+; ILP32E-FPELIM-NEXT: andi a0, a0, -4
+; ILP32E-FPELIM-NEXT: addi a1, a0, 4
+; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a1, 0(a0)
+; ILP32E-FPELIM-NEXT: addi a0, a0, 7
+; ILP32E-FPELIM-NEXT: andi a0, a0, -4
+; ILP32E-FPELIM-NEXT: addi a2, a0, 4
+; ILP32E-FPELIM-NEXT: sw a2, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a2, 0(a0)
+; ILP32E-FPELIM-NEXT: addi a0, a0, 7
+; ILP32E-FPELIM-NEXT: andi a0, a0, -4
+; ILP32E-FPELIM-NEXT: addi a3, a0, 4
+; ILP32E-FPELIM-NEXT: sw a3, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a0, 0(a0)
+; ILP32E-FPELIM-NEXT: add a1, a1, s0
+; ILP32E-FPELIM-NEXT: add a1, a1, a2
+; ILP32E-FPELIM-NEXT: add a0, a1, a0
+; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 40
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va4_va_copy:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -44
+; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 20
+; ILP32E-WITHFP-NEXT: mv s1, a1
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a0, s0, 8
+; ILP32E-WITHFP-NEXT: sw a0, -16(s0)
+; ILP32E-WITHFP-NEXT: sw a0, -20(s0)
+; ILP32E-WITHFP-NEXT: call notdead
+; ILP32E-WITHFP-NEXT: lw a0, -16(s0)
+; ILP32E-WITHFP-NEXT: addi a0, a0, 3
+; ILP32E-WITHFP-NEXT: andi a0, a0, -4
+; ILP32E-WITHFP-NEXT: addi a1, a0, 4
+; ILP32E-WITHFP-NEXT: sw a1, -16(s0)
+; ILP32E-WITHFP-NEXT: lw a1, 0(a0)
+; ILP32E-WITHFP-NEXT: addi a0, a0, 7
+; ILP32E-WITHFP-NEXT: andi a0, a0, -4
+; ILP32E-WITHFP-NEXT: addi a2, a0, 4
+; ILP32E-WITHFP-NEXT: sw a2, -16(s0)
+; ILP32E-WITHFP-NEXT: lw a2, 0(a0)
+; ILP32E-WITHFP-NEXT: addi a0, a0, 7
+; ILP32E-WITHFP-NEXT: andi a0, a0, -4
+; ILP32E-WITHFP-NEXT: addi a3, a0, 4
+; ILP32E-WITHFP-NEXT: sw a3, -16(s0)
+; ILP32E-WITHFP-NEXT: lw a0, 0(a0)
+; ILP32E-WITHFP-NEXT: add a1, a1, s1
+; ILP32E-WITHFP-NEXT: add a1, a1, a2
+; ILP32E-WITHFP-NEXT: add a0, a1, a0
+; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 44
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va4_va_copy:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96
@@ -1321,6 +2144,87 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 112
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va4_va_copy:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -80
+; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: mv s0, a1
+; LP64E-FPELIM-NEXT: sd a5, 72(sp)
+; LP64E-FPELIM-NEXT: sd a4, 64(sp)
+; LP64E-FPELIM-NEXT: sd a3, 56(sp)
+; LP64E-FPELIM-NEXT: sd a2, 48(sp)
+; LP64E-FPELIM-NEXT: sd a1, 40(sp)
+; LP64E-FPELIM-NEXT: addi a0, sp, 48
+; LP64E-FPELIM-NEXT: sd a0, 8(sp)
+; LP64E-FPELIM-NEXT: sd a0, 0(sp)
+; LP64E-FPELIM-NEXT: call notdead
+; LP64E-FPELIM-NEXT: ld a0, 8(sp)
+; LP64E-FPELIM-NEXT: addi a0, a0, 3
+; LP64E-FPELIM-NEXT: andi a0, a0, -4
+; LP64E-FPELIM-NEXT: addi a1, a0, 8
+; LP64E-FPELIM-NEXT: sd a1, 8(sp)
+; LP64E-FPELIM-NEXT: ld a1, 0(a0)
+; LP64E-FPELIM-NEXT: addi a0, a0, 11
+; LP64E-FPELIM-NEXT: andi a0, a0, -4
+; LP64E-FPELIM-NEXT: addi a2, a0, 8
+; LP64E-FPELIM-NEXT: sd a2, 8(sp)
+; LP64E-FPELIM-NEXT: ld a2, 0(a0)
+; LP64E-FPELIM-NEXT: addi a0, a0, 11
+; LP64E-FPELIM-NEXT: andi a0, a0, -4
+; LP64E-FPELIM-NEXT: addi a3, a0, 8
+; LP64E-FPELIM-NEXT: sd a3, 8(sp)
+; LP64E-FPELIM-NEXT: ld a0, 0(a0)
+; LP64E-FPELIM-NEXT: add a1, a1, s0
+; LP64E-FPELIM-NEXT: add a1, a1, a2
+; LP64E-FPELIM-NEXT: addw a0, a1, a0
+; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: addi sp, sp, 80
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va4_va_copy:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -88
+; LP64E-WITHFP-NEXT: sd ra, 32(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 40
+; LP64E-WITHFP-NEXT: mv s1, a1
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: addi a0, s0, 16
+; LP64E-WITHFP-NEXT: sd a0, -32(s0)
+; LP64E-WITHFP-NEXT: sd a0, -40(s0)
+; LP64E-WITHFP-NEXT: call notdead
+; LP64E-WITHFP-NEXT: ld a0, -32(s0)
+; LP64E-WITHFP-NEXT: addi a0, a0, 3
+; LP64E-WITHFP-NEXT: andi a0, a0, -4
+; LP64E-WITHFP-NEXT: addi a1, a0, 8
+; LP64E-WITHFP-NEXT: sd a1, -32(s0)
+; LP64E-WITHFP-NEXT: ld a1, 0(a0)
+; LP64E-WITHFP-NEXT: addi a0, a0, 11
+; LP64E-WITHFP-NEXT: andi a0, a0, -4
+; LP64E-WITHFP-NEXT: addi a2, a0, 8
+; LP64E-WITHFP-NEXT: sd a2, -32(s0)
+; LP64E-WITHFP-NEXT: ld a2, 0(a0)
+; LP64E-WITHFP-NEXT: addi a0, a0, 11
+; LP64E-WITHFP-NEXT: andi a0, a0, -4
+; LP64E-WITHFP-NEXT: addi a3, a0, 8
+; LP64E-WITHFP-NEXT: sd a3, -32(s0)
+; LP64E-WITHFP-NEXT: ld a0, 0(a0)
+; LP64E-WITHFP-NEXT: add a1, a1, s1
+; LP64E-WITHFP-NEXT: add a1, a1, a2
+; LP64E-WITHFP-NEXT: addw a0, a1, a0
+; LP64E-WITHFP-NEXT: ld ra, 32(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 88
+; LP64E-WITHFP-NEXT: ret
%vargs = alloca ptr
%wargs = alloca ptr
call void @llvm.va_start(ptr %vargs)
@@ -1478,6 +2382,104 @@ define void @va5_aligned_stack_caller() nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va5_aligned_stack_caller:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -64
+; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; ILP32E-FPELIM-NEXT: addi s0, sp, 64
+; ILP32E-FPELIM-NEXT: andi sp, sp, -16
+; ILP32E-FPELIM-NEXT: li a0, 17
+; ILP32E-FPELIM-NEXT: sw a0, 24(sp)
+; ILP32E-FPELIM-NEXT: li a0, 16
+; ILP32E-FPELIM-NEXT: sw a0, 20(sp)
+; ILP32E-FPELIM-NEXT: li a0, 15
+; ILP32E-FPELIM-NEXT: sw a0, 16(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 262236
+; ILP32E-FPELIM-NEXT: addi a0, a0, 655
+; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 377487
+; ILP32E-FPELIM-NEXT: addi a0, a0, 1475
+; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-NEXT: li a0, 14
+; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: li a0, 4
+; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 262153
+; ILP32E-FPELIM-NEXT: addi a0, a0, 491
+; ILP32E-FPELIM-NEXT: sw a0, 44(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 545260
+; ILP32E-FPELIM-NEXT: addi a0, a0, -1967
+; ILP32E-FPELIM-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 964690
+; ILP32E-FPELIM-NEXT: addi a0, a0, -328
+; ILP32E-FPELIM-NEXT: sw a0, 36(sp)
+; ILP32E-FPELIM-NEXT: lui a0, 335544
+; ILP32E-FPELIM-NEXT: addi a6, a0, 1311
+; ILP32E-FPELIM-NEXT: lui a0, 688509
+; ILP32E-FPELIM-NEXT: addi a5, a0, -2048
+; ILP32E-FPELIM-NEXT: li a0, 1
+; ILP32E-FPELIM-NEXT: li a1, 11
+; ILP32E-FPELIM-NEXT: addi a2, sp, 32
+; ILP32E-FPELIM-NEXT: li a3, 12
+; ILP32E-FPELIM-NEXT: li a4, 13
+; ILP32E-FPELIM-NEXT: sw a6, 32(sp)
+; ILP32E-FPELIM-NEXT: call va5_aligned_stack_callee
+; ILP32E-FPELIM-NEXT: addi sp, s0, -64
+; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; ILP32E-FPELIM-NEXT: addi sp, sp, 64
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va5_aligned_stack_caller:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -64
+; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 64
+; ILP32E-WITHFP-NEXT: andi sp, sp, -16
+; ILP32E-WITHFP-NEXT: li a0, 17
+; ILP32E-WITHFP-NEXT: sw a0, 24(sp)
+; ILP32E-WITHFP-NEXT: li a0, 16
+; ILP32E-WITHFP-NEXT: sw a0, 20(sp)
+; ILP32E-WITHFP-NEXT: li a0, 15
+; ILP32E-WITHFP-NEXT: sw a0, 16(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 262236
+; ILP32E-WITHFP-NEXT: addi a0, a0, 655
+; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 377487
+; ILP32E-WITHFP-NEXT: addi a0, a0, 1475
+; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
+; ILP32E-WITHFP-NEXT: li a0, 14
+; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
+; ILP32E-WITHFP-NEXT: li a0, 4
+; ILP32E-WITHFP-NEXT: sw a0, 0(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 262153
+; ILP32E-WITHFP-NEXT: addi a0, a0, 491
+; ILP32E-WITHFP-NEXT: sw a0, 44(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 545260
+; ILP32E-WITHFP-NEXT: addi a0, a0, -1967
+; ILP32E-WITHFP-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 964690
+; ILP32E-WITHFP-NEXT: addi a0, a0, -328
+; ILP32E-WITHFP-NEXT: sw a0, 36(sp)
+; ILP32E-WITHFP-NEXT: lui a0, 335544
+; ILP32E-WITHFP-NEXT: addi a6, a0, 1311
+; ILP32E-WITHFP-NEXT: lui a0, 688509
+; ILP32E-WITHFP-NEXT: addi a5, a0, -2048
+; ILP32E-WITHFP-NEXT: li a0, 1
+; ILP32E-WITHFP-NEXT: li a1, 11
+; ILP32E-WITHFP-NEXT: addi a2, sp, 32
+; ILP32E-WITHFP-NEXT: li a3, 12
+; ILP32E-WITHFP-NEXT: li a4, 13
+; ILP32E-WITHFP-NEXT: sw a6, 32(sp)
+; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee
+; ILP32E-WITHFP-NEXT: addi sp, s0, -64
+; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 64
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48
@@ -1540,6 +2542,73 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 48
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va5_aligned_stack_caller:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -56
+; LP64E-FPELIM-NEXT: sd ra, 48(sp) # 8-byte Folded Spill
+; LP64E-FPELIM-NEXT: li a0, 17
+; LP64E-FPELIM-NEXT: sd a0, 40(sp)
+; LP64E-FPELIM-NEXT: li a0, 16
+; LP64E-FPELIM-NEXT: lui a1, %hi(.LCPI11_0)
+; LP64E-FPELIM-NEXT: ld a1, %lo(.LCPI11_0)(a1)
+; LP64E-FPELIM-NEXT: sd a0, 32(sp)
+; LP64E-FPELIM-NEXT: li a0, 15
+; LP64E-FPELIM-NEXT: sd a0, 24(sp)
+; LP64E-FPELIM-NEXT: sd a1, 16(sp)
+; LP64E-FPELIM-NEXT: li a0, 14
+; LP64E-FPELIM-NEXT: sd a0, 8(sp)
+; LP64E-FPELIM-NEXT: lui a0, 2384
+; LP64E-FPELIM-NEXT: addiw a0, a0, 761
+; LP64E-FPELIM-NEXT: slli a6, a0, 11
+; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_1)
+; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0)
+; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_2)
+; LP64E-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a0)
+; LP64E-FPELIM-NEXT: li a0, 1
+; LP64E-FPELIM-NEXT: li a1, 11
+; LP64E-FPELIM-NEXT: li a4, 12
+; LP64E-FPELIM-NEXT: li a5, 13
+; LP64E-FPELIM-NEXT: sd a6, 0(sp)
+; LP64E-FPELIM-NEXT: call va5_aligned_stack_callee
+; LP64E-FPELIM-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
+; LP64E-FPELIM-NEXT: addi sp, sp, 56
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va5_aligned_stack_caller:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -64
+; LP64E-WITHFP-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 64
+; LP64E-WITHFP-NEXT: li a0, 17
+; LP64E-WITHFP-NEXT: sd a0, 40(sp)
+; LP64E-WITHFP-NEXT: li a0, 16
+; LP64E-WITHFP-NEXT: lui a1, %hi(.LCPI11_0)
+; LP64E-WITHFP-NEXT: ld a1, %lo(.LCPI11_0)(a1)
+; LP64E-WITHFP-NEXT: sd a0, 32(sp)
+; LP64E-WITHFP-NEXT: li a0, 15
+; LP64E-WITHFP-NEXT: sd a0, 24(sp)
+; LP64E-WITHFP-NEXT: sd a1, 16(sp)
+; LP64E-WITHFP-NEXT: li a0, 14
+; LP64E-WITHFP-NEXT: sd a0, 8(sp)
+; LP64E-WITHFP-NEXT: lui a0, 2384
+; LP64E-WITHFP-NEXT: addiw a0, a0, 761
+; LP64E-WITHFP-NEXT: slli a6, a0, 11
+; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_1)
+; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0)
+; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_2)
+; LP64E-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a0)
+; LP64E-WITHFP-NEXT: li a0, 1
+; LP64E-WITHFP-NEXT: li a1, 11
+; LP64E-WITHFP-NEXT: li a4, 12
+; LP64E-WITHFP-NEXT: li a5, 13
+; LP64E-WITHFP-NEXT: sd a6, 0(sp)
+; LP64E-WITHFP-NEXT: call va5_aligned_stack_callee
+; LP64E-WITHFP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 64
+; LP64E-WITHFP-NEXT: ret
%1 = call i32 (i32, ...) @va5_aligned_stack_callee(i32 1, i32 11,
fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, i64 20000000000,
i32 14, double 2.720000e+00, i32 15, [2 x i32] [i32 16, i32 17])
@@ -1604,6 +2673,39 @@ define i32 @va6_no_fixed_args(...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va6_no_fixed_args:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: addi sp, sp, -32
+; ILP32E-FPELIM-NEXT: sw a5, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
+; ILP32E-FPELIM-NEXT: addi a1, sp, 12
+; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: addi sp, sp, 32
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va6_no_fixed_args:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -48
+; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: addi s0, sp, 24
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a0, 0(s0)
+; ILP32E-WITHFP-NEXT: addi a1, s0, 4
+; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
+; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 48
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
@@ -1640,6 +2742,39 @@ define i32 @va6_no_fixed_args(...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va6_no_fixed_args:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: addi sp, sp, -64
+; LP64E-FPELIM-NEXT: sd a5, 56(sp)
+; LP64E-FPELIM-NEXT: sd a4, 48(sp)
+; LP64E-FPELIM-NEXT: sd a3, 40(sp)
+; LP64E-FPELIM-NEXT: sd a2, 32(sp)
+; LP64E-FPELIM-NEXT: sd a1, 24(sp)
+; LP64E-FPELIM-NEXT: sd a0, 16(sp)
+; LP64E-FPELIM-NEXT: addi a1, sp, 24
+; LP64E-FPELIM-NEXT: sd a1, 8(sp)
+; LP64E-FPELIM-NEXT: addi sp, sp, 64
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va6_no_fixed_args:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -80
+; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: addi s0, sp, 32
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a0, 0(s0)
+; LP64E-WITHFP-NEXT: addi a1, s0, 8
+; LP64E-WITHFP-NEXT: sd a1, -24(s0)
+; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 80
+; LP64E-WITHFP-NEXT: ret
%va = alloca ptr
call void @llvm.va_start(ptr %va)
%1 = va_arg ptr %va, i32
@@ -1757,6 +2892,68 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add sp, sp, a1
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
+; ILP32E-FPELIM-LABEL: va_large_stack:
+; ILP32E-FPELIM: # %bb.0:
+; ILP32E-FPELIM-NEXT: lui a0, 24414
+; ILP32E-FPELIM-NEXT: addi a0, a0, 288
+; ILP32E-FPELIM-NEXT: sub sp, sp, a0
+; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 100000032
+; ILP32E-FPELIM-NEXT: mv a0, a1
+; ILP32E-FPELIM-NEXT: lui a6, 24414
+; ILP32E-FPELIM-NEXT: add a6, sp, a6
+; ILP32E-FPELIM-NEXT: sw a5, 284(a6)
+; ILP32E-FPELIM-NEXT: lui a5, 24414
+; ILP32E-FPELIM-NEXT: add a5, sp, a5
+; ILP32E-FPELIM-NEXT: sw a4, 280(a5)
+; ILP32E-FPELIM-NEXT: lui a4, 24414
+; ILP32E-FPELIM-NEXT: add a4, sp, a4
+; ILP32E-FPELIM-NEXT: sw a3, 276(a4)
+; ILP32E-FPELIM-NEXT: lui a3, 24414
+; ILP32E-FPELIM-NEXT: add a3, sp, a3
+; ILP32E-FPELIM-NEXT: sw a2, 272(a3)
+; ILP32E-FPELIM-NEXT: lui a2, 24414
+; ILP32E-FPELIM-NEXT: add a2, sp, a2
+; ILP32E-FPELIM-NEXT: sw a1, 268(a2)
+; ILP32E-FPELIM-NEXT: lui a1, 24414
+; ILP32E-FPELIM-NEXT: addi a1, a1, 272
+; ILP32E-FPELIM-NEXT: add a1, sp, a1
+; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: lui a1, 24414
+; ILP32E-FPELIM-NEXT: addi a1, a1, 288
+; ILP32E-FPELIM-NEXT: add sp, sp, a1
+; ILP32E-FPELIM-NEXT: ret
+;
+; ILP32E-WITHFP-LABEL: va_large_stack:
+; ILP32E-WITHFP: # %bb.0:
+; ILP32E-WITHFP-NEXT: addi sp, sp, -2044
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 2044
+; ILP32E-WITHFP-NEXT: sw ra, 2016(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: sw s0, 2012(sp) # 4-byte Folded Spill
+; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28
+; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32
+; ILP32E-WITHFP-NEXT: addi s0, sp, 2020
+; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24
+; ILP32E-WITHFP-NEXT: lui a0, 24414
+; ILP32E-WITHFP-NEXT: addi a0, a0, -1740
+; ILP32E-WITHFP-NEXT: sub sp, sp, a0
+; ILP32E-WITHFP-NEXT: mv a0, a1
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: addi a1, s0, 8
+; ILP32E-WITHFP-NEXT: lui a2, 24414
+; ILP32E-WITHFP-NEXT: sub a2, s0, a2
+; ILP32E-WITHFP-NEXT: sw a1, -272(a2)
+; ILP32E-WITHFP-NEXT: lui a1, 24414
+; ILP32E-WITHFP-NEXT: addi a1, a1, -1740
+; ILP32E-WITHFP-NEXT: add sp, sp, a1
+; ILP32E-WITHFP-NEXT: lw ra, 2016(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: lw s0, 2012(sp) # 4-byte Folded Reload
+; ILP32E-WITHFP-NEXT: addi sp, sp, 2044
+; ILP32E-WITHFP-NEXT: ret
+;
; LP64-LP64F-LP64D-FPELIM-LABEL: va_large_stack:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414
@@ -1828,6 +3025,70 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 1952(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 2032
; LP64-LP64F-LP64D-WITHFP-NEXT: ret
+;
+; LP64E-FPELIM-LABEL: va_large_stack:
+; LP64E-FPELIM: # %bb.0:
+; LP64E-FPELIM-NEXT: lui a0, 24414
+; LP64E-FPELIM-NEXT: addiw a0, a0, 320
+; LP64E-FPELIM-NEXT: sub sp, sp, a0
+; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 100000064
+; LP64E-FPELIM-NEXT: lui a0, 24414
+; LP64E-FPELIM-NEXT: add a0, sp, a0
+; LP64E-FPELIM-NEXT: sd a1, 280(a0)
+; LP64E-FPELIM-NEXT: lui a0, 24414
+; LP64E-FPELIM-NEXT: addiw a0, a0, 284
+; LP64E-FPELIM-NEXT: add a0, sp, a0
+; LP64E-FPELIM-NEXT: sd a0, 8(sp)
+; LP64E-FPELIM-NEXT: lui a0, 24414
+; LP64E-FPELIM-NEXT: add a0, sp, a0
+; LP64E-FPELIM-NEXT: lw a0, 280(a0)
+; LP64E-FPELIM-NEXT: lui a1, 24414
+; LP64E-FPELIM-NEXT: add a1, sp, a1
+; LP64E-FPELIM-NEXT: sd a5, 312(a1)
+; LP64E-FPELIM-NEXT: lui a1, 24414
+; LP64E-FPELIM-NEXT: add a1, sp, a1
+; LP64E-FPELIM-NEXT: sd a4, 304(a1)
+; LP64E-FPELIM-NEXT: lui a1, 24414
+; LP64E-FPELIM-NEXT: add a1, sp, a1
+; LP64E-FPELIM-NEXT: sd a3, 296(a1)
+; LP64E-FPELIM-NEXT: lui a1, 24414
+; LP64E-FPELIM-NEXT: add a1, sp, a1
+; LP64E-FPELIM-NEXT: sd a2, 288(a1)
+; LP64E-FPELIM-NEXT: lui a1, 24414
+; LP64E-FPELIM-NEXT: addiw a1, a1, 320
+; LP64E-FPELIM-NEXT: add sp, sp, a1
+; LP64E-FPELIM-NEXT: ret
+;
+; LP64E-WITHFP-LABEL: va_large_stack:
+; LP64E-WITHFP: # %bb.0:
+; LP64E-WITHFP-NEXT: addi sp, sp, -2040
+; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 2040
+; LP64E-WITHFP-NEXT: sd ra, 1984(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: sd s0, 1976(sp) # 8-byte Folded Spill
+; LP64E-WITHFP-NEXT: .cfi_offset ra, -56
+; LP64E-WITHFP-NEXT: .cfi_offset s0, -64
+; LP64E-WITHFP-NEXT: addi s0, sp, 1992
+; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48
+; LP64E-WITHFP-NEXT: lui a0, 24414
+; LP64E-WITHFP-NEXT: addiw a0, a0, -1704
+; LP64E-WITHFP-NEXT: sub sp, sp, a0
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: addi a0, s0, 12
+; LP64E-WITHFP-NEXT: lui a1, 24414
+; LP64E-WITHFP-NEXT: sub a1, s0, a1
+; LP64E-WITHFP-NEXT: sd a0, -288(a1)
+; LP64E-WITHFP-NEXT: lw a0, 8(s0)
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: lui a1, 24414
+; LP64E-WITHFP-NEXT: addiw a1, a1, -1704
+; LP64E-WITHFP-NEXT: add sp, sp, a1
+; LP64E-WITHFP-NEXT: ld ra, 1984(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: ld s0, 1976(sp) # 8-byte Folded Reload
+; LP64E-WITHFP-NEXT: addi sp, sp, 2040
+; LP64E-WITHFP-NEXT: ret
%large = alloca [ 100000000 x i8 ]
%va = alloca ptr
call void @llvm.va_start(ptr %va)
diff --git a/llvm/test/MC/RISCV/option-invalid.s b/llvm/test/MC/RISCV/option-invalid.s
index 683ebc4f5b99239..ee520e08746a39c 100644
--- a/llvm/test/MC/RISCV/option-invalid.s
+++ b/llvm/test/MC/RISCV/option-invalid.s
@@ -56,9 +56,6 @@
# CHECK: :[[#@LINE+1]]:12: warning: unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'arch', 'relax' or 'norelax'
.option bar
-# CHECK: :[[#@LINE+1]]:16: error: unknown extension feature
-.option arch, -i
-
# CHECK: :[[#@LINE+1]]:12: error: .option pop with no .option push
.option pop
diff --git a/llvm/test/MC/RISCV/target-abi-invalid.s b/llvm/test/MC/RISCV/target-abi-invalid.s
index d7dba182fd16695..f78b1481b1e4804 100644
--- a/llvm/test/MC/RISCV/target-abi-invalid.s
+++ b/llvm/test/MC/RISCV/target-abi-invalid.s
@@ -30,7 +30,7 @@
# RUN: | FileCheck -check-prefix=RV32E-LP64 %s
# RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi lp64f < %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV32EF-LP64F %s
-# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \
+# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV32EFD-LP64D %s
# RUN: llvm-mc -triple=riscv32 -mattr=+e -target-abi lp64e %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV32E-LP64E %s
@@ -42,6 +42,7 @@
# RV32EF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi)
# RV32EFD-LP64D: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi)
# RV32E-LP64E: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi)
+# RV32EFD-LP64D: LLVM ERROR: ILP32E cannot be used with the D ISA extension
# RUN: llvm-mc -triple=riscv32 -target-abi ilp32f < %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV32I-ILP32F %s
@@ -69,15 +70,17 @@
# RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s
# RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi ilp32f < %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s
-# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \
+# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV32EFD-ILP32F %s
-# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \
+# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV32EFD-ILP32D %s
# RV32E-ILP32: Only the ilp32e ABI is supported for RV32E (ignoring target-abi)
# RV32EF-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi)
# RV32EFD-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi)
+# RV32EFD-ILP32F: LLVM ERROR: ILP32E cannot be used with the D ISA extension
# RV32EFD-ILP32D: Only the ilp32e ABI is supported for RV32E (ignoring target-abi)
+# RV32EFD-ILP32D: LLVM ERROR: ILP32E cannot be used with the D ISA extension
# RUN: llvm-mc -triple=riscv64 -mattr=+e -target-abi lp64 < %s 2>&1 \
# RUN: | FileCheck -check-prefix=RV64EF-LP64F %s
More information about the cfe-commits
mailing list