[llvm] 972f297 - Resolve TODO: Use TokenFactor for inline memset (#87002)
via llvm-commits
llvm-commits at lists.llvm.org
Sun May 26 12:08:54 PDT 2024
Author: AtariDreams
Date: 2024-05-26T15:08:51-04:00
New Revision: 972f297f712d822208ceae7546c516cd3696e4b1
URL: https://github.com/llvm/llvm-project/commit/972f297f712d822208ceae7546c516cd3696e4b1
DIFF: https://github.com/llvm/llvm-project/commit/972f297f712d822208ceae7546c516cd3696e4b1.diff
LOG: Resolve TODO: Use TokenFactor for inline memset (#87002)
We can rewrite this as a TokenFactor like memcpy is.
Added:
Modified:
llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 0bff1884933d8..e5f07f230fe6c 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -48,26 +48,25 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- const X86Subtarget &Subtarget =
- DAG.getMachineFunction().getSubtarget<X86Subtarget>();
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
-#ifndef NDEBUG
// If the base register might conflict with our physical registers, bail out.
const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
X86::ECX, X86::EAX, X86::EDI};
- assert(!isBaseRegConflictPossible(DAG, ClobberSet));
-#endif
-
- // If to a segment-relative address space, use the default lowering.
- if (DstPtrInfo.getAddrSpace() >= 256)
+ if (isBaseRegConflictPossible(DAG, ClobberSet))
return SDValue();
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ const X86Subtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<X86Subtarget>();
+
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
if (Alignment < Align(4) || !ConstantSize ||
- ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
+ ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
@@ -128,26 +127,29 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
InGlue = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
-
- if (BytesLeft) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
- EVT AddrVT = Dst.getValueType();
- EVT SizeVT = Size.getValueType();
-
- Chain =
- DAG.getMemset(Chain, dl,
- DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
- DAG.getConstant(Offset, dl, AddrVT)),
- Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
- isVolatile, AlwaysInline,
- /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
- }
+ SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
+ SDValue RepStos = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
- // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
- return Chain;
+ /// RepStos can process the whole length.
+ if (BytesLeft == 0)
+ return RepStos;
+
+ // Handle the last 1 - 7 bytes.
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepStos);
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
+
+ Results.push_back(
+ DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, dl, AddrVT)),
+ Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
+ isVolatile, AlwaysInline,
+ /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset)));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
}
/// Emit a single REP MOVS{B,W,D,Q} instruction.
More information about the llvm-commits
mailing list