[llvm] [RISCV] Add optimization for memset inline (PR #146673)
Boyao Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 19:58:41 PDT 2025
https://github.com/BoyaoWang430 updated https://github.com/llvm/llvm-project/pull/146673
>From e6d4a85acd722aea8fb371d7358dd6ea31ba7840 Mon Sep 17 00:00:00 2001
From: wangboyao <wangboyao at bytedance.com>
Date: Wed, 2 Jul 2025 15:07:44 +0800
Subject: [PATCH 1/3] [RISCV] Add optimization for memset inline
Optimize RISCV memset inline implementation based on the issue discussed in #144562.
---
llvm/include/llvm/CodeGen/TargetLowering.h | 15 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 5 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +-
llvm/lib/Target/AMDGPU/SIISelLowering.h | 4 +-
llvm/lib/Target/ARM/ARMISelLowering.cpp | 6 +-
llvm/lib/Target/ARM/ARMISelLowering.h | 3 +-
llvm/lib/Target/BPF/BPFISelLowering.h | 4 +-
.../Target/Hexagon/HexagonISelLowering.cpp | 3 +-
llvm/lib/Target/Hexagon/HexagonISelLowering.h | 4 +-
llvm/lib/Target/Mips/MipsISelLowering.cpp | 5 +-
llvm/lib/Target/Mips/MipsISelLowering.h | 3 +-
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +-
llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 21 +-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +-
.../Target/SystemZ/SystemZISelLowering.cpp | 12 +-
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 12 +-
llvm/lib/Target/X86/X86ISelLowering.h | 3 +-
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 5 +-
llvm/test/CodeGen/RISCV/pr135206.ll | 47 +-
llvm/test/CodeGen/RISCV/rvv/memset-inline.ll | 402 ++++++++++++------
llvm/test/CodeGen/RISCV/rvv/pr83017.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/pr90559.ll | 10 +-
26 files changed, 376 insertions(+), 229 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 84c53e1e45452..2f7f859e8d58d 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2019,9 +2019,9 @@ class LLVM_ABI TargetLoweringBase {
/// a result of memset, memcpy, and memmove lowering.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- virtual EVT
- getOptimalMemOpType(const MemOp &Op,
- const AttributeList & /*FuncAttributes*/) const {
+ virtual EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList & /*FuncAttributes*/,
+ LLVMContext *Context = nullptr) const {
return MVT::Other;
}
@@ -4108,10 +4108,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// Note that this is always the case when Limit is ~0.
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
- virtual bool
- findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
- const MemOp &Op, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes) const;
+ virtual bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const;
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6df21b624137f..7ca2fa46c5f72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8334,7 +8334,7 @@ static SDValue getMemcpyLoadsAndStores(
*SrcAlign, isVol, CopyFromConstant);
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
@@ -8529,7 +8529,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
/*IsVolatile*/ true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes()))
+ MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
@@ -8634,6 +8634,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
+ LLVMContext &C = *DAG.getContext();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
@@ -8646,7 +8647,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
- DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
+ DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 000f8cc6786a5..f25c42e8d8ea1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -211,12 +211,13 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ unsigned SrcAS, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() < Op.getDstAlign())
return false;
- EVT VT = getOptimalMemOpType(Op, FuncAttributes);
+ EVT VT = getOptimalMemOpType(Op, FuncAttributes, Context);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81c033af..044af4bea3a2c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17587,7 +17587,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
}
EVT AArch64TargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 89f90ee2b7707..47eb60684388b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConsiderGEPOffsetSplit() const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
LLT getOptimalMemOpLLT(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b083a9014737b..15d0d5feae830 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1982,8 +1982,9 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
Alignment, Flags, IsFast);
}
-EVT SITargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT SITargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
// FIXME: Should account for address space here.
// The default fallback uses the private pointer size as a guess for a type to
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c66f300ec4cb1..2ebcf716afcc9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -357,8 +357,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *IsFast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c357756a25878..ee699af37ef11 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -19215,9 +19215,9 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
}
-
-EVT ARMTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT ARMTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 604910e04d4cc..e7bee969a9e29 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -473,7 +473,8 @@ class VectorType;
unsigned *Fast) const override;
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 23cbce7094e6b..f400a271786ff 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -114,8 +114,8 @@ class BPFTargetLowering : public TargetLowering {
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override {
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override {
return Op.size() >= 8 ? MVT::i64 : MVT::i32;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index e7d0ec6ee0fe5..4543474ef114c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3742,7 +3742,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
EVT HexagonTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Op.size() >= 8 && Op.isAligned(Align(8)))
return MVT::i64;
if (Op.size() >= 4 && Op.isAligned(Align(4)))
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index a2c9b57d04caa..e5b477d5fd57a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -325,8 +325,8 @@ class HexagonTargetLowering : public TargetLowering {
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 9b4b336727235..8fa5a6ef4c589 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -4519,8 +4519,9 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-EVT MipsTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT MipsTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Subtarget.hasMips64())
return MVT::i64;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 241e9343ae384..2580894b15bb5 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -699,7 +699,8 @@ class TargetRegisterClass;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5a4a63469ad6e..fbfc825b2823c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18190,8 +18190,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT PPCTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4c88bd372b106..13876fcc57783 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1089,7 +1089,8 @@ namespace llvm {
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 326dd7149ef96..126e1e2c8de24 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1662,7 +1662,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
- MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
+ MaxStoresPerMemset = Subtarget.hasVInstructions()
+ ? Subtarget.getRealMinVLen() / 8
+ : Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
@@ -23749,9 +23751,9 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
return Subtarget.enableUnalignedVectorMem();
}
-
-EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const {
+EVT RISCVTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (!Subtarget.hasVInstructions())
return MVT::Other;
@@ -23780,6 +23782,17 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
return MVT::Other;
+ // If Op size is greater than LMUL8 memory operation, we don't support inline
+ // of memset. Return EVT based on Op size to avoid redundant splitting and
+ // merging operations if Op size is no greater than LMUL8 memory operation.
+ if (Op.isMemset()) {
+ if (Op.size() > 8 * MinVLenInBytes)
+ return MVT::Other;
+ if (Op.size() % 8 == 0)
+ return EVT::getVectorVT(*Context, MVT::i64, Op.size() / 8);
+ return EVT::getVectorVT(*Context, MVT::i8, Op.size());
+ }
+
// Prefer i8 for non-zero memset as it allows us to avoid materializing
// a large scalar constant and instead use vmv.v.x/i to do the
// broadcast. For everything else, prefer ELenVT to minimize VL and thus
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index f67d7f155c9d0..ae2360d6e85eb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -331,8 +331,8 @@ class RISCVTargetLowering : public TargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool splitValueIntoRegisterParts(
SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c6044514fa951..6dd29d3a2ef51 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1424,7 +1424,8 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool SystemZTargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ unsigned SrcAS, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
const int MVCFastLen = 16;
if (Limit != ~unsigned(0)) {
@@ -1437,12 +1438,13 @@ bool SystemZTargetLowering::findOptimalMemOpLowering(
return false; // Memset zero: Use XC
}
- return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
- SrcAS, FuncAttributes);
+ return TargetLowering::findOptimalMemOpLowering(
+ MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, Context);
}
-EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const {
+EVT SystemZTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f2f0bf6d8b410..98e7c891745d8 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -509,12 +509,12 @@ class SystemZTargetLowering : public TargetLowering {
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
unsigned *Fast) const override;
- bool
- findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
- const MemOp &Op, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
+ const MemOp &Op, unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5cb6b3e493a32..dca3096507212 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1096,7 +1096,8 @@ namespace llvm {
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
/// Returns true if it's safe to use load / store of the
/// specified type to expand memcpy / memset inline. This is mostly true
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index cb38a39ff991d..44f1e9f1ca52b 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -286,8 +286,9 @@ Align X86TargetLowering::getByValTypeAlignment(Type *Ty,
/// target-independent logic.
/// For vector ops we check that the overall size isn't larger than our
/// preferred vector width.
-EVT X86TargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT X86TargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll b/llvm/test/CodeGen/RISCV/pr135206.ll
index 75b11c373895b..12c0dc3759bbf 100644
--- a/llvm/test/CodeGen/RISCV/pr135206.ll
+++ b/llvm/test/CodeGen/RISCV/pr135206.ll
@@ -12,9 +12,6 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
; CHECK-NEXT: addi sp, sp, -2032
; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill
; CHECK-NEXT: lui a0, 7
; CHECK-NEXT: sub t1, sp, a0
; CHECK-NEXT: lui t2, 1
@@ -24,8 +21,9 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
; CHECK-NEXT: bne sp, t1, .LBB0_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: addi sp, sp, -2048
-; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: addi sp, sp, -48
; CHECK-NEXT: csrr t1, vlenb
+; CHECK-NEXT: slli t1, t1, 2
; CHECK-NEXT: lui t2, 1
; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, t2
@@ -34,45 +32,34 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
; CHECK-NEXT: bge t1, t2, .LBB0_3
; CHECK-NEXT: # %bb.4:
; CHECK-NEXT: sub sp, sp, t1
-; CHECK-NEXT: li a0, 86
-; CHECK-NEXT: addi s0, sp, 48
-; CHECK-NEXT: addi s1, sp, 32
-; CHECK-NEXT: addi s2, sp, 16
-; CHECK-NEXT: lui a1, 353637
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: lui a0, 353637
+; CHECK-NEXT: addi a0, a0, 1622
+; CHECK-NEXT: slli a1, a0, 32
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetivli zero, 7, e64, m4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, 32
+; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-NEXT: addi a0, a1, 1622
-; CHECK-NEXT: vse8.v v8, (s0)
-; CHECK-NEXT: vse8.v v8, (s1)
-; CHECK-NEXT: vse8.v v8, (s2)
-; CHECK-NEXT: slli a1, a0, 32
-; CHECK-NEXT: add s3, a0, a1
-; CHECK-NEXT: sd s3, 64(sp)
+; CHECK-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
+; CHECK-NEXT: addi s0, sp, 16
+; CHECK-NEXT: vse64.v v8, (s0)
; CHECK-NEXT: call bar
; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, 32
+; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vse8.v v8, (s0)
-; CHECK-NEXT: vse8.v v8, (s1)
-; CHECK-NEXT: vse8.v v8, (s2)
-; CHECK-NEXT: sd s3, 64(sp)
+; CHECK-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
+; CHECK-NEXT: vsetivli zero, 7, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (s0)
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: add sp, sp, a1
; CHECK-NEXT: lui a1, 8
-; CHECK-NEXT: addi a1, a1, -1952
+; CHECK-NEXT: addi a1, a1, -2000
; CHECK-NEXT: add sp, sp, a1
; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 2032
; CHECK-NEXT: ret
%1 = alloca %"buff", align 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
index 896394017b6f1..9ad561d66a662 100644
--- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
@@ -149,71 +149,197 @@ define void @memset_8(ptr %a, i8 %value) nounwind {
}
define void @memset_16(ptr %a, i8 %value) nounwind {
-; RV32-BOTH-LABEL: memset_16:
-; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BOTH-NEXT: vmv.v.x v8, a1
-; RV32-BOTH-NEXT: vse8.v v8, (a0)
-; RV32-BOTH-NEXT: ret
+; RV32-LABEL: memset_16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: zext.b a1, a1
+; RV32-NEXT: lui a2, 4112
+; RV32-NEXT: addi a2, a2, 257
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v8, (a1), zero
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v8, (a0)
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
;
-; RV64-BOTH-LABEL: memset_16:
-; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BOTH-NEXT: vmv.v.x v8, a1
-; RV64-BOTH-NEXT: vse8.v v8, (a0)
-; RV64-BOTH-NEXT: ret
+; RV64-LABEL: memset_16:
+; RV64: # %bb.0:
+; RV64-NEXT: zext.b a1, a1
+; RV64-NEXT: lui a2, 4112
+; RV64-NEXT: addi a2, a2, 257
+; RV64-NEXT: slli a3, a2, 32
+; RV64-NEXT: add a2, a2, a3
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vse8.v v8, (a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_16:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: addi sp, sp, -16
+; RV32-FAST-NEXT: zext.b a1, a1
+; RV32-FAST-NEXT: lui a2, 4112
+; RV32-FAST-NEXT: addi a2, a2, 257
+; RV32-FAST-NEXT: mul a1, a1, a2
+; RV32-FAST-NEXT: sw a1, 8(sp)
+; RV32-FAST-NEXT: sw a1, 12(sp)
+; RV32-FAST-NEXT: addi a1, sp, 8
+; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-FAST-NEXT: vlse64.v v8, (a1), zero
+; RV32-FAST-NEXT: vse64.v v8, (a0)
+; RV32-FAST-NEXT: addi sp, sp, 16
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_16:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: zext.b a1, a1
+; RV64-FAST-NEXT: lui a2, 4112
+; RV64-FAST-NEXT: addi a2, a2, 257
+; RV64-FAST-NEXT: slli a3, a2, 32
+; RV64-FAST-NEXT: add a2, a2, a3
+; RV64-FAST-NEXT: mul a1, a1, a2
+; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-FAST-NEXT: vmv.v.x v8, a1
+; RV64-FAST-NEXT: vse64.v v8, (a0)
+; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0)
ret void
}
define void @memset_32(ptr %a, i8 %value) nounwind {
-; RV32-BOTH-LABEL: memset_32:
-; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: addi a2, a0, 16
-; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BOTH-NEXT: vmv.v.x v8, a1
-; RV32-BOTH-NEXT: vse8.v v8, (a2)
-; RV32-BOTH-NEXT: vse8.v v8, (a0)
-; RV32-BOTH-NEXT: ret
+; RV32-LABEL: memset_32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: zext.b a1, a1
+; RV32-NEXT: lui a2, 4112
+; RV32-NEXT: addi a2, a2, 257
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v8, (a1), zero
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT: vse8.v v8, (a0)
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
;
-; RV64-BOTH-LABEL: memset_32:
-; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: addi a2, a0, 16
-; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BOTH-NEXT: vmv.v.x v8, a1
-; RV64-BOTH-NEXT: vse8.v v8, (a2)
-; RV64-BOTH-NEXT: vse8.v v8, (a0)
-; RV64-BOTH-NEXT: ret
+; RV64-LABEL: memset_32:
+; RV64: # %bb.0:
+; RV64-NEXT: zext.b a1, a1
+; RV64-NEXT: lui a2, 4112
+; RV64-NEXT: addi a2, a2, 257
+; RV64-NEXT: slli a3, a2, 32
+; RV64-NEXT: add a2, a2, a3
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT: vse8.v v8, (a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_32:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: addi sp, sp, -16
+; RV32-FAST-NEXT: zext.b a1, a1
+; RV32-FAST-NEXT: lui a2, 4112
+; RV32-FAST-NEXT: addi a2, a2, 257
+; RV32-FAST-NEXT: mul a1, a1, a2
+; RV32-FAST-NEXT: sw a1, 8(sp)
+; RV32-FAST-NEXT: sw a1, 12(sp)
+; RV32-FAST-NEXT: addi a1, sp, 8
+; RV32-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-FAST-NEXT: vlse64.v v8, (a1), zero
+; RV32-FAST-NEXT: vse64.v v8, (a0)
+; RV32-FAST-NEXT: addi sp, sp, 16
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_32:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: zext.b a1, a1
+; RV64-FAST-NEXT: lui a2, 4112
+; RV64-FAST-NEXT: addi a2, a2, 257
+; RV64-FAST-NEXT: slli a3, a2, 32
+; RV64-FAST-NEXT: add a2, a2, a3
+; RV64-FAST-NEXT: mul a1, a1, a2
+; RV64-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-FAST-NEXT: vmv.v.x v8, a1
+; RV64-FAST-NEXT: vse64.v v8, (a0)
+; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0)
ret void
}
define void @memset_64(ptr %a, i8 %value) nounwind {
-; RV32-BOTH-LABEL: memset_64:
-; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: addi a2, a0, 48
-; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BOTH-NEXT: vmv.v.x v8, a1
-; RV32-BOTH-NEXT: addi a1, a0, 32
-; RV32-BOTH-NEXT: vse8.v v8, (a2)
-; RV32-BOTH-NEXT: addi a2, a0, 16
-; RV32-BOTH-NEXT: vse8.v v8, (a1)
-; RV32-BOTH-NEXT: vse8.v v8, (a2)
-; RV32-BOTH-NEXT: vse8.v v8, (a0)
-; RV32-BOTH-NEXT: ret
+; RV32-LABEL: memset_64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: zext.b a1, a1
+; RV32-NEXT: lui a2, 4112
+; RV32-NEXT: addi a2, a2, 257
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v8, (a1), zero
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT: vse8.v v8, (a0)
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
;
-; RV64-BOTH-LABEL: memset_64:
-; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: addi a2, a0, 48
-; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-BOTH-NEXT: vmv.v.x v8, a1
-; RV64-BOTH-NEXT: addi a1, a0, 32
-; RV64-BOTH-NEXT: vse8.v v8, (a2)
-; RV64-BOTH-NEXT: addi a2, a0, 16
-; RV64-BOTH-NEXT: vse8.v v8, (a1)
-; RV64-BOTH-NEXT: vse8.v v8, (a2)
-; RV64-BOTH-NEXT: vse8.v v8, (a0)
-; RV64-BOTH-NEXT: ret
+; RV64-LABEL: memset_64:
+; RV64: # %bb.0:
+; RV64-NEXT: zext.b a1, a1
+; RV64-NEXT: lui a2, 4112
+; RV64-NEXT: addi a2, a2, 257
+; RV64-NEXT: slli a3, a2, 32
+; RV64-NEXT: add a2, a2, a3
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT: vse8.v v8, (a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_64:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: addi sp, sp, -16
+; RV32-FAST-NEXT: zext.b a1, a1
+; RV32-FAST-NEXT: lui a2, 4112
+; RV32-FAST-NEXT: addi a2, a2, 257
+; RV32-FAST-NEXT: mul a1, a1, a2
+; RV32-FAST-NEXT: sw a1, 8(sp)
+; RV32-FAST-NEXT: sw a1, 12(sp)
+; RV32-FAST-NEXT: addi a1, sp, 8
+; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-FAST-NEXT: vlse64.v v8, (a1), zero
+; RV32-FAST-NEXT: vse64.v v8, (a0)
+; RV32-FAST-NEXT: addi sp, sp, 16
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_64:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: zext.b a1, a1
+; RV64-FAST-NEXT: lui a2, 4112
+; RV64-FAST-NEXT: addi a2, a2, 257
+; RV64-FAST-NEXT: slli a3, a2, 32
+; RV64-FAST-NEXT: add a2, a2, a3
+; RV64-FAST-NEXT: mul a1, a1, a2
+; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-FAST-NEXT: vmv.v.x v8, a1
+; RV64-FAST-NEXT: vse64.v v8, (a0)
+; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0)
ret void
}
@@ -291,16 +417,31 @@ define void @aligned_memset_8(ptr align 8 %a, i8 %value) nounwind {
define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
; RV32-BOTH-LABEL: aligned_memset_16:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BOTH-NEXT: vmv.v.x v8, a1
-; RV32-BOTH-NEXT: vse8.v v8, (a0)
+; RV32-BOTH-NEXT: addi sp, sp, -16
+; RV32-BOTH-NEXT: zext.b a1, a1
+; RV32-BOTH-NEXT: lui a2, 4112
+; RV32-BOTH-NEXT: addi a2, a2, 257
+; RV32-BOTH-NEXT: mul a1, a1, a2
+; RV32-BOTH-NEXT: sw a1, 8(sp)
+; RV32-BOTH-NEXT: sw a1, 12(sp)
+; RV32-BOTH-NEXT: addi a1, sp, 8
+; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-BOTH-NEXT: vlse64.v v8, (a1), zero
+; RV32-BOTH-NEXT: vse64.v v8, (a0)
+; RV32-BOTH-NEXT: addi sp, sp, 16
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_16:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BOTH-NEXT: zext.b a1, a1
+; RV64-BOTH-NEXT: lui a2, 4112
+; RV64-BOTH-NEXT: addi a2, a2, 257
+; RV64-BOTH-NEXT: slli a3, a2, 32
+; RV64-BOTH-NEXT: add a2, a2, a3
+; RV64-BOTH-NEXT: mul a1, a1, a2
+; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-BOTH-NEXT: vmv.v.x v8, a1
-; RV64-BOTH-NEXT: vse8.v v8, (a0)
+; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0)
ret void
@@ -309,20 +450,31 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
; RV32-BOTH-LABEL: aligned_memset_32:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: addi a2, a0, 16
-; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BOTH-NEXT: vmv.v.x v8, a1
-; RV32-BOTH-NEXT: vse8.v v8, (a2)
-; RV32-BOTH-NEXT: vse8.v v8, (a0)
+; RV32-BOTH-NEXT: addi sp, sp, -16
+; RV32-BOTH-NEXT: zext.b a1, a1
+; RV32-BOTH-NEXT: lui a2, 4112
+; RV32-BOTH-NEXT: addi a2, a2, 257
+; RV32-BOTH-NEXT: mul a1, a1, a2
+; RV32-BOTH-NEXT: sw a1, 8(sp)
+; RV32-BOTH-NEXT: sw a1, 12(sp)
+; RV32-BOTH-NEXT: addi a1, sp, 8
+; RV32-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-BOTH-NEXT: vlse64.v v8, (a1), zero
+; RV32-BOTH-NEXT: vse64.v v8, (a0)
+; RV32-BOTH-NEXT: addi sp, sp, 16
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_32:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: addi a2, a0, 16
-; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BOTH-NEXT: zext.b a1, a1
+; RV64-BOTH-NEXT: lui a2, 4112
+; RV64-BOTH-NEXT: addi a2, a2, 257
+; RV64-BOTH-NEXT: slli a3, a2, 32
+; RV64-BOTH-NEXT: add a2, a2, a3
+; RV64-BOTH-NEXT: mul a1, a1, a2
+; RV64-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-BOTH-NEXT: vmv.v.x v8, a1
-; RV64-BOTH-NEXT: vse8.v v8, (a2)
-; RV64-BOTH-NEXT: vse8.v v8, (a0)
+; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0)
ret void
@@ -331,28 +483,31 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
; RV32-BOTH-LABEL: aligned_memset_64:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: addi a2, a0, 48
-; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-BOTH-NEXT: vmv.v.x v8, a1
-; RV32-BOTH-NEXT: addi a1, a0, 32
-; RV32-BOTH-NEXT: vse8.v v8, (a2)
-; RV32-BOTH-NEXT: addi a2, a0, 16
-; RV32-BOTH-NEXT: vse8.v v8, (a1)
-; RV32-BOTH-NEXT: vse8.v v8, (a2)
-; RV32-BOTH-NEXT: vse8.v v8, (a0)
+; RV32-BOTH-NEXT: addi sp, sp, -16
+; RV32-BOTH-NEXT: zext.b a1, a1
+; RV32-BOTH-NEXT: lui a2, 4112
+; RV32-BOTH-NEXT: addi a2, a2, 257
+; RV32-BOTH-NEXT: mul a1, a1, a2
+; RV32-BOTH-NEXT: sw a1, 8(sp)
+; RV32-BOTH-NEXT: sw a1, 12(sp)
+; RV32-BOTH-NEXT: addi a1, sp, 8
+; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-BOTH-NEXT: vlse64.v v8, (a1), zero
+; RV32-BOTH-NEXT: vse64.v v8, (a0)
+; RV32-BOTH-NEXT: addi sp, sp, 16
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_64:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: addi a2, a0, 48
-; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-BOTH-NEXT: zext.b a1, a1
+; RV64-BOTH-NEXT: lui a2, 4112
+; RV64-BOTH-NEXT: addi a2, a2, 257
+; RV64-BOTH-NEXT: slli a3, a2, 32
+; RV64-BOTH-NEXT: add a2, a2, a3
+; RV64-BOTH-NEXT: mul a1, a1, a2
+; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-BOTH-NEXT: vmv.v.x v8, a1
-; RV64-BOTH-NEXT: addi a1, a0, 32
-; RV64-BOTH-NEXT: vse8.v v8, (a2)
-; RV64-BOTH-NEXT: addi a2, a0, 16
-; RV64-BOTH-NEXT: vse8.v v8, (a1)
-; RV64-BOTH-NEXT: vse8.v v8, (a2)
-; RV64-BOTH-NEXT: vse8.v v8, (a0)
+; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0)
ret void
@@ -472,15 +627,17 @@ define void @bzero_8(ptr %a) nounwind {
define void @bzero_16(ptr %a) nounwind {
; RV32-LABEL: bzero_16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: ret
;
@@ -504,38 +661,34 @@ define void @bzero_16(ptr %a) nounwind {
define void @bzero_32(ptr %a) nounwind {
; RV32-LABEL: bzero_32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: addi a0, a0, 16
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_32:
; RV32-FAST: # %bb.0:
-; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-FAST-NEXT: vmv.v.i v8, 0
; RV32-FAST-NEXT: vse64.v v8, (a0)
-; RV32-FAST-NEXT: addi a0, a0, 16
-; RV32-FAST-NEXT: vse64.v v8, (a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: bzero_32:
; RV64-FAST: # %bb.0:
-; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-FAST-NEXT: vmv.v.i v8, 0
; RV64-FAST-NEXT: vse64.v v8, (a0)
-; RV64-FAST-NEXT: addi a0, a0, 16
-; RV64-FAST-NEXT: vse64.v v8, (a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0)
ret void
@@ -544,17 +697,19 @@ define void @bzero_32(ptr %a) nounwind {
define void @bzero_64(ptr %a) nounwind {
; RV32-LABEL: bzero_64:
; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: li a1, 64
; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_64:
; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: li a1, 64
; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: ret
;
@@ -642,20 +797,16 @@ define void @aligned_bzero_16(ptr %a) nounwind {
define void @aligned_bzero_32(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_32:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
; RV32-BOTH-NEXT: vse64.v v8, (a0)
-; RV32-BOTH-NEXT: addi a0, a0, 16
-; RV32-BOTH-NEXT: vse64.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_32:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
; RV64-BOTH-NEXT: vse64.v v8, (a0)
-; RV64-BOTH-NEXT: addi a0, a0, 16
-; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0)
ret void
@@ -682,18 +833,22 @@ define void @aligned_bzero_64(ptr %a) nounwind {
define void @aligned_bzero_66(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_66:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: sh zero, 64(a0)
-; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-BOTH-NEXT: li a1, 128
+; RV32-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
-; RV32-BOTH-NEXT: vse64.v v8, (a0)
+; RV32-BOTH-NEXT: li a1, 66
+; RV32-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV32-BOTH-NEXT: vse8.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_66:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: sh zero, 64(a0)
-; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-BOTH-NEXT: li a1, 128
+; RV64-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
-; RV64-BOTH-NEXT: vse64.v v8, (a0)
+; RV64-BOTH-NEXT: li a1, 66
+; RV64-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; RV64-BOTH-NEXT: vse8.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 66, i1 0)
ret void
@@ -702,27 +857,15 @@ define void @aligned_bzero_66(ptr %a) nounwind {
define void @aligned_bzero_96(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_96:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-BOTH-NEXT: vmv.v.i v8, 0
-; RV32-BOTH-NEXT: addi a1, a0, 80
-; RV32-BOTH-NEXT: vse64.v v8, (a0)
-; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-BOTH-NEXT: vsetivli zero, 12, e64, m8, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
-; RV32-BOTH-NEXT: addi a0, a0, 64
-; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: vse64.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_96:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-BOTH-NEXT: vsetivli zero, 12, e64, m8, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
-; RV64-BOTH-NEXT: addi a1, a0, 80
-; RV64-BOTH-NEXT: vse64.v v8, (a0)
-; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-BOTH-NEXT: vmv.v.i v8, 0
-; RV64-BOTH-NEXT: addi a0, a0, 64
-; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0)
@@ -750,11 +893,12 @@ define void @aligned_bzero_128(ptr %a) nounwind {
define void @aligned_bzero_256(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_256:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-BOTH-NEXT: addi a1, a0, 128
+; RV32-BOTH-NEXT: li a2, 32
+; RV32-BOTH-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
-; RV32-BOTH-NEXT: vse64.v v8, (a0)
-; RV32-BOTH-NEXT: addi a0, a0, 128
-; RV32-BOTH-NEXT: vse64.v v8, (a0)
+; RV32-BOTH-NEXT: vse32.v v8, (a1)
+; RV32-BOTH-NEXT: vse32.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_256:
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr83017.ll b/llvm/test/CodeGen/RISCV/rvv/pr83017.ll
index beca480378a35..7a450397a30ee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr83017.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr83017.ll
@@ -31,15 +31,9 @@ define void @aliasing(ptr %p) {
; CHECK-LABEL: aliasing:
; CHECK: # %bb.0:
; CHECK-NEXT: lw a1, 84(a0)
-; CHECK-NEXT: addi a2, a0, 80
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vs1r.v v8, (a2)
-; CHECK-NEXT: addi a2, a0, 64
-; CHECK-NEXT: vs1r.v v8, (a2)
-; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vs4r.v v8, (a0)
+; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: sw a1, 84(a0)
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i64 84
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
index 7e109f307c4a5..97a3e6f2f6f58 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
@@ -28,15 +28,9 @@ define void @f(ptr %p) vscale_range(2,2) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
; CHECK-NEXT: lw a1, 84(a0)
-; CHECK-NEXT: addi a2, a0, 80
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vs1r.v v8, (a2)
-; CHECK-NEXT: addi a2, a0, 64
-; CHECK-NEXT: vs1r.v v8, (a2)
-; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vs4r.v v8, (a0)
+; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: sw a1, 84(a0)
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i64 84
>From d9e63ec68998615c29aeba33c880b346f176a0c1 Mon Sep 17 00:00:00 2001
From: wangboyao <wangboyao at bytedance.com>
Date: Thu, 3 Jul 2025 10:23:58 +0800
Subject: [PATCH 2/3] Add llvm_unreachable to ensure LLVMContext is not null
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 126e1e2c8de24..c818b1c961c54 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23754,6 +23754,9 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT RISCVTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes,
LLVMContext *Context) const {
+ if (!Context)
+ llvm_unreachable("LLVMContext must not be null here");
+
if (!Subtarget.hasVInstructions())
return MVT::Other;
>From 8fc4864c63677e5ae2117a36600c75cc12cb4549 Mon Sep 17 00:00:00 2001
From: wangboyao <wangboyao at bytedance.com>
Date: Sat, 5 Jul 2025 10:57:52 +0800
Subject: [PATCH 3/3] Change the way to pass LLVMContext
---
llvm/include/llvm/CodeGen/TargetLowering.h | 16 ++++++++--------
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 12 ++++++------
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 8 ++++----
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++--
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 ++--
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +++---
llvm/lib/Target/AMDGPU/SIISelLowering.h | 4 ++--
llvm/lib/Target/ARM/ARMISelLowering.cpp | 6 +++---
llvm/lib/Target/ARM/ARMISelLowering.h | 5 ++---
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 6 +++---
llvm/lib/Target/BPF/BPFISelLowering.h | 4 ++--
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 4 ++--
llvm/lib/Target/Hexagon/HexagonISelLowering.h | 4 ++--
llvm/lib/Target/Mips/MipsISelLowering.cpp | 6 +++---
llvm/lib/Target/Mips/MipsISelLowering.h | 5 ++---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 +++---
llvm/lib/Target/PowerPC/PPCISelLowering.h | 5 ++---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 11 ++++-------
llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 ++--
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 14 +++++++-------
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 13 +++++++------
llvm/lib/Target/X86/X86ISelLowering.h | 5 ++---
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 6 +++---
23 files changed, 76 insertions(+), 82 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 2f7f859e8d58d..be9259045ee4d 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2019,9 +2019,9 @@ class LLVM_ABI TargetLoweringBase {
/// a result of memset, memcpy, and memmove lowering.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- virtual EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList & /*FuncAttributes*/,
- LLVMContext *Context = nullptr) const {
+ virtual EVT
+ getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList & /*FuncAttributes*/) const {
return MVT::Other;
}
@@ -4108,11 +4108,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// Note that this is always the case when Limit is ~0.
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
- virtual bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
- unsigned Limit, const MemOp &Op,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const;
+ virtual bool
+ findOptimalMemOpLowering(LLVMContext &Context, std::vector<EVT> &MemOps,
+ unsigned Limit, const MemOp &Op, unsigned DstAS,
+ unsigned SrcAS,
+ const AttributeList &FuncAttributes) const;
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7ca2fa46c5f72..5832802cad5ae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8333,8 +8333,8 @@ static SDValue getMemcpyLoadsAndStores(
: MemOp::Copy(Size, DstAlignCanChange, Alignment,
*SrcAlign, isVol, CopyFromConstant);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), &C))
+ C, MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -8525,11 +8525,11 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
assert(SrcAlign && "SrcAlign must be set");
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit,
+ C, MemOps, Limit,
MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
/*IsVolatile*/ true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), &C))
+ MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -8645,9 +8645,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit,
+ C, MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
- DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes(), &C))
+ DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f25c42e8d8ea1..545a3b17d1856 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -210,14 +210,14 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
}
bool TargetLowering::findOptimalMemOpLowering(
- std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+ LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
+ const MemOp &Op, unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const {
if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() < Op.getDstAlign())
return false;
- EVT VT = getOptimalMemOpType(Op, FuncAttributes, Context);
+ EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 044af4bea3a2c..e245aa6acdbe6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17587,8 +17587,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
}
EVT AArch64TargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 47eb60684388b..65fe08e92c235 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConsiderGEPOffsetSplit() const override;
- EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
LLT getOptimalMemOpLLT(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 15d0d5feae830..404f8ede41e12 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1982,9 +1982,9 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
Alignment, Flags, IsFast);
}
-EVT SITargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+EVT SITargetLowering::getOptimalMemOpType(
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
// FIXME: Should account for address space here.
// The default fallback uses the private pointer size as a guess for a type to
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 2ebcf716afcc9..acf6158572a4d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -357,8 +357,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *IsFast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a85bf405556b1..264eca83fb12e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -19219,9 +19219,9 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
}
-EVT ARMTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+EVT ARMTargetLowering::getOptimalMemOpType(
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index e7bee969a9e29..5f4aef55b22c9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -472,9 +472,8 @@ class VectorType;
MachineMemOperand::Flags Flags,
unsigned *Fast) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 203fb76d7be86..05d4069a686ab 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1213,9 +1213,9 @@ int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
// loaded and stored. That's why we multiply the number of elements by 2 to
// get the cost for this memcpy.
std::vector<EVT> MemOps;
- if (getTLI()->findOptimalMemOpLowering(
- MemOps, Limit, MOp, DstAddrSpace,
- SrcAddrSpace, F->getAttributes()))
+ LLVMContext &C = F->getContext();
+ if (getTLI()->findOptimalMemOpLowering(C, MemOps, Limit, MOp, DstAddrSpace,
+ SrcAddrSpace, F->getAttributes()))
return MemOps.size() * Factor;
// If we can't find an optimal memop lowering, return the default cost
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index f400a271786ff..8f60261c10e9e 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -114,8 +114,8 @@ class BPFTargetLowering : public TargetLowering {
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override {
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override {
return Op.size() >= 8 ? MVT::i64 : MVT::i32;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 4543474ef114c..f05ff881edf2f 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3742,8 +3742,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
EVT HexagonTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
if (Op.size() >= 8 && Op.isAligned(Align(8)))
return MVT::i64;
if (Op.size() >= 4 && Op.isAligned(Align(4)))
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index e5b477d5fd57a..5f528fb6c9cc0 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -325,8 +325,8 @@ class HexagonTargetLowering : public TargetLowering {
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
- EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 8fa5a6ef4c589..c2f62cd88abc0 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -4519,9 +4519,9 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-EVT MipsTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+EVT MipsTargetLowering::getOptimalMemOpType(
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
if (Subtarget.hasMips64())
return MVT::i64;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 2580894b15bb5..31ac5d4c185bc 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -698,9 +698,8 @@ class TargetRegisterClass;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fbfc825b2823c..90b736805f97d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18190,9 +18190,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+EVT PPCTargetLowering::getOptimalMemOpType(
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 13876fcc57783..124c7116dc3b5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1088,9 +1088,8 @@ namespace llvm {
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b01af803d0596..042104578f0e0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23779,11 +23779,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
}
EVT RISCVTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
- if (!Context)
- llvm_unreachable("LLVMContext must not be null here");
-
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
if (!Subtarget.hasVInstructions())
return MVT::Other;
@@ -23819,8 +23816,8 @@ EVT RISCVTargetLowering::getOptimalMemOpType(
if (Op.size() > 8 * MinVLenInBytes)
return MVT::Other;
if (Op.size() % 8 == 0)
- return EVT::getVectorVT(*Context, MVT::i64, Op.size() / 8);
- return EVT::getVectorVT(*Context, MVT::i8, Op.size());
+ return EVT::getVectorVT(Context, MVT::i64, Op.size() / 8);
+ return EVT::getVectorVT(Context, MVT::i8, Op.size());
}
// Prefer i8 for non-zero memset as it allows us to avoid materializing
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ae2360d6e85eb..bcbda30342b80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -331,8 +331,8 @@ class RISCVTargetLowering : public TargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
bool splitValueIntoRegisterParts(
SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6dd29d3a2ef51..dbb406d2bb55f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1423,9 +1423,9 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
bool SystemZTargetLowering::findOptimalMemOpLowering(
- std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+ LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
+ const MemOp &Op, unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const {
const int MVCFastLen = 16;
if (Limit != ~unsigned(0)) {
@@ -1438,13 +1438,13 @@ bool SystemZTargetLowering::findOptimalMemOpLowering(
return false; // Memset zero: Use XC
}
- return TargetLowering::findOptimalMemOpLowering(
- MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, Context);
+ return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
+ DstAS, SrcAS, FuncAttributes);
}
EVT SystemZTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 98e7c891745d8..1866962e17587 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -509,12 +509,13 @@ class SystemZTargetLowering : public TargetLowering {
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
unsigned *Fast) const override;
- bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
- const MemOp &Op, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes,
- LLVMContext *Context) const override;
- EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ bool
+ findOptimalMemOpLowering(LLVMContext &Context, std::vector<EVT> &MemOps,
+ unsigned Limit, const MemOp &Op, unsigned DstAS,
+ unsigned SrcAS,
+ const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index dca3096507212..3039b7eeb38ff 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1095,9 +1095,8 @@ namespace llvm {
/// 4-byte boundaries.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context = nullptr) const override;
+ EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
/// Returns true if it's safe to use load / store of the
/// specified type to expand memcpy / memset inline. This is mostly true
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 44f1e9f1ca52b..9ad355311527b 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -286,9 +286,9 @@ Align X86TargetLowering::getByValTypeAlignment(Type *Ty,
/// target-independent logic.
/// For vector ops we check that the overall size isn't larger than our
/// preferred vector width.
-EVT X86TargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes,
- LLVMContext *Context) const {
+EVT X86TargetLowering::getOptimalMemOpType(
+ LLVMContext &Context, const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
More information about the llvm-commits
mailing list