[llvm] [NFC] Convert LoadExtActions to a map (PR #157627)
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 05:24:00 PDT 2025
https://github.com/sparker-arm updated https://github.com/llvm/llvm-project/pull/157627
>From 7fb2ce40c14e14c9907c9a541262d57cd1549c94 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Tue, 9 Sep 2025 10:13:57 +0100
Subject: [PATCH 1/2] [NFC][Codegen] Convert LoadExtActions to a map
So we can store actions per address space. The default space, zero,
is used is one is not supplied.
---
llvm/include/llvm/CodeGen/TargetLowering.h | 45 ++++++++++++++--------
llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 -
2 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 2ba8b29e775e0..451f3c73b2764 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1479,27 +1479,31 @@ class LLVM_ABI TargetLoweringBase {
/// Return how this load with extension should be treated: either it is legal,
/// needs to be promoted to a larger size, needs to be expanded to some other
/// code sequence, or the target has a custom expander for it.
- LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
- EVT MemVT) const {
+ LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT,
+ unsigned AddrSpace = 0) const {
if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
unsigned Shift = 4 * ExtType;
- return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
+ return (
+ LegalizeAction)((LoadExtActions.at(AddrSpace)[ValI][MemI] >> Shift) &
+ 0xf);
}
/// Return true if the specified load with extension is legal on this target.
- bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
- return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
+ bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT,
+ unsigned AddrSpace = 0) const {
+ return getLoadExtAction(ExtType, ValVT, MemVT, AddrSpace) == Legal;
}
/// Return true if the specified load with extension is legal or custom
/// on this target.
- bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
- return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
- getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
+ bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT,
+ unsigned AddrSpace = 0) const {
+ return getLoadExtAction(ExtType, ValVT, MemVT, AddrSpace) == Legal ||
+ getLoadExtAction(ExtType, ValVT, MemVT, AddrSpace) == Custom;
}
/// Same as getLoadExtAction, but for atomic loads.
@@ -2641,23 +2645,26 @@ class LLVM_ABI TargetLoweringBase {
/// Indicate that the specified load with extension does not work with the
/// specified type and indicate what to do about it.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
- LegalizeAction Action) {
+ LegalizeAction Action, unsigned AddrSpace = 0) {
assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
MemVT.isValid() && "Table isn't big enough!");
assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
unsigned Shift = 4 * ExtType;
- LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
- LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
+ LoadExtActions[AddrSpace][ValVT.SimpleTy][MemVT.SimpleTy] &=
+ ~((uint16_t)0xF << Shift);
+ LoadExtActions[AddrSpace][ValVT.SimpleTy][MemVT.SimpleTy] |=
+ (uint16_t)Action << Shift;
}
void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
- LegalizeAction Action) {
+ LegalizeAction Action, unsigned AddrSpace = 0) {
for (auto ExtType : ExtTypes)
- setLoadExtAction(ExtType, ValVT, MemVT, Action);
+ setLoadExtAction(ExtType, ValVT, MemVT, Action, AddrSpace);
}
void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
- ArrayRef<MVT> MemVTs, LegalizeAction Action) {
+ ArrayRef<MVT> MemVTs, LegalizeAction Action,
+ unsigned AddrSpace = 0) {
for (auto MemVT : MemVTs)
- setLoadExtAction(ExtTypes, ValVT, MemVT, Action);
+ setLoadExtAction(ExtTypes, ValVT, MemVT, Action, AddrSpace);
}
/// Let target indicate that an extending atomic load of the specified type
@@ -3748,8 +3755,12 @@ class LLVM_ABI TargetLoweringBase {
/// For each load extension type and each value type, keep a LegalizeAction
/// that indicates how instruction selection should deal with a load of a
/// specific value type and extension type. Uses 4-bits to store the action
- /// for each of the 4 load ext types.
- uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
+ /// for each of the 4 load ext types. These actions can be specified for each
+ /// address space.
+ using LoadExtActionMap =
+ std::map<unsigned, std::array<std::array<uint16_t, MVT::VALUETYPE_SIZE>,
+ MVT::VALUETYPE_SIZE>>;
+ LoadExtActionMap LoadExtActions;
/// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand
/// (default) values are supported.
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index c23281a820b2b..5432d9673deaf 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -728,7 +728,6 @@ TargetLoweringBase::~TargetLoweringBase() = default;
void TargetLoweringBase::initActions() {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
- memset(LoadExtActions, 0, sizeof(LoadExtActions));
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
>From c9c5d8205551a6cb8a3f9ea04a077b03b6b2fbdc Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Wed, 10 Sep 2025 12:11:43 +0100
Subject: [PATCH 2/2] hooking up dagcombine and legalize
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +-
llvm/include/llvm/CodeGen/TargetLowering.h | 22 ++--
llvm/lib/CodeGen/CodeGenPrepare.cpp | 3 +-
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 103 +++++++++++-------
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 12 +-
.../SelectionDAG/LegalizeVectorOps.cpp | 3 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 3 +-
llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +
.../CodeGen/AMDGPU/call-argument-types.ll | 5 -
llvm/test/CodeGen/AMDGPU/function-returns.ll | 2 +
.../AMDGPU/global-extload-gfx11plus.ll | 30 ++---
.../AMDGPU/infer-addrspace-flat-atomic.ll | 8 +-
llvm/test/CodeGen/AMDGPU/load-constant-i1.ll | 12 ++
llvm/test/CodeGen/AMDGPU/load-constant-i8.ll | 41 +++----
llvm/test/CodeGen/AMDGPU/load-global-i8.ll | 1 -
llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll | 6 +-
16 files changed, 142 insertions(+), 114 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index dce423fc1b18b..49a64e3d66f41 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1531,7 +1531,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (Opcode == Instruction::Store)
LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
else
- LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
+ LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT,
+ AddressSpace);
if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
// This is a vector load/store for some illegal type that is scalarized.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 451f3c73b2764..2f9f6f8cec3c8 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1487,9 +1487,16 @@ class LLVM_ABI TargetLoweringBase {
assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
unsigned Shift = 4 * ExtType;
- return (
- LegalizeAction)((LoadExtActions.at(AddrSpace)[ValI][MemI] >> Shift) &
- 0xf);
+ if (LoadExtActions.count(AddrSpace)) {
+ return (
+ LegalizeAction)((LoadExtActions.at(AddrSpace)[ValI][MemI] >> Shift) &
+ 0xf);
+ } else {
+ assert(AddrSpace != 0 && "addrspace zero should be initialized");
+ return (
+ LegalizeAction)((LoadExtActions.at(0)[ValI][MemI] >> Shift) &
+ 0xf);
+ }
}
/// Return true if the specified load with extension is legal on this target.
@@ -2649,6 +2656,7 @@ class LLVM_ABI TargetLoweringBase {
assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
MemVT.isValid() && "Table isn't big enough!");
assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
+ assert(AddrSpace == 0 && "expected addrspace 0");
unsigned Shift = 4 * ExtType;
LoadExtActions[AddrSpace][ValVT.SimpleTy][MemVT.SimpleTy] &=
~((uint16_t)0xF << Shift);
@@ -3140,7 +3148,7 @@ class LLVM_ABI TargetLoweringBase {
LType = ISD::SEXTLOAD;
}
- return isLoadExtLegal(LType, VT, LoadVT);
+ return isLoadExtLegal(LType, VT, LoadVT, Load->getPointerAddressSpace());
}
/// Return true if any actual instruction that defines a value of type FromTy
@@ -3757,9 +3765,9 @@ class LLVM_ABI TargetLoweringBase {
/// specific value type and extension type. Uses 4-bits to store the action
/// for each of the 4 load ext types. These actions can be specified for each
/// address space.
- using LoadExtActionMap =
- std::map<unsigned, std::array<std::array<uint16_t, MVT::VALUETYPE_SIZE>,
- MVT::VALUETYPE_SIZE>>;
+ using LoadExtActionMapTy =
+ std::array<std::array<uint16_t, MVT::VALUETYPE_SIZE>, MVT::VALUETYPE_SIZE>;
+ using LoadExtActionMap = std::map<unsigned, LoadExtActionMapTy>;
LoadExtActionMap LoadExtActions;
/// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9db4c9e5e2807..e6d6c5f2aa738 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7328,7 +7328,8 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
// Reject cases that won't be matched as extloads.
if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
- !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
+ !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT,
+ Load->getPointerAddressSpace()))
return false;
IRBuilder<> Builder(Load->getNextNode());
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d130efe96b56b..a31c2ea5ca905 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6834,7 +6834,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
if (ExtVT == LoadedVT &&
(!LegalOperations ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT,
+ LoadN->getAddressSpace()))) {
// ZEXTLOAD will match without needing to change the size of the value being
// loaded.
return true;
@@ -6850,7 +6851,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
return false;
if (LegalOperations &&
- !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT,
+ LoadN->getAddressSpace()))
return false;
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
@@ -6913,7 +6915,8 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
return false;
if (LegalOperations &&
- !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
+ !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT,
+ Load->getAddressSpace()))
return false;
// For the transform to be legal, the load must produce only two values
@@ -7425,7 +7428,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
- if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT,
+ MLoad->getAddressSpace())) {
// For this AND to be a zero extension of the masked load the elements
// of the BuildVec must mask the bottom bits of the extended element
// type
@@ -7576,9 +7580,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
// actually legal and isn't going to get expanded, else this is a false
// optimisation.
- bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
- Load->getValueType(0),
- Load->getMemoryVT());
+ bool CanZextLoadProfitably =
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, Load->getValueType(0),
+ Load->getMemoryVT(), Load->getAddressSpace());
// Resize the constant to the same size as the original memory access before
// extension. If it is still the AllOnesValue then this AND is completely
@@ -7770,7 +7774,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
if (DAG.MaskedValueIsZero(N1, ExtBits) &&
((!LegalOperations && LN0->isSimple()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT,
+ LN0->getAddressSpace()))) {
SDValue ExtLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
LN0->getBasePtr(), MemVT, LN0->getMemOperand());
@@ -9692,10 +9697,13 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Before legalize we can introduce too wide illegal loads which will be later
// split into legal sized loads. This enables us to combine i64 load by i8
// patterns to a couple of i32 loads on 32 bit targets.
- if (LegalOperations &&
- !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
- MemVT))
- return SDValue();
+ if (LegalOperations) {
+ for (auto L : Loads) {
+ if (!TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
+ MemVT, L->getAddressSpace()))
+ return SDValue();
+ }
+ }
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
@@ -13307,7 +13315,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
SetCCWidth != 1 && SetCCWidth < WideWidth &&
- TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
+ TLI.isLoadExtLegalOrCustom(
+ LoadExtOpcode, WideVT, NarrowVT,
+ cast<LoadSDNode>(LHS)->getAddressSpace()) &&
TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
// Both compare operands can be widened for free. The LHS can use an
// extended load, and the RHS is a constant:
@@ -13754,8 +13764,10 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
// Combine2), so we should conservatively check the OperationAction.
LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
- if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
- !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
+ if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT(),
+ Load1->getAddressSpace()) ||
+ !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT(),
+ Load2->getAddressSpace()) ||
(N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal))
return SDValue();
@@ -13979,13 +13991,15 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
// Try to split the vector types to get down to legal types.
EVT SplitSrcVT = SrcVT;
EVT SplitDstVT = DstVT;
- while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+ while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT,
+ LN0->getAddressSpace()) &&
SplitSrcVT.getVectorNumElements() > 1) {
SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
}
- if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+ if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT,
+ LN0->getAddressSpace()))
return SDValue();
assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
@@ -14058,7 +14072,7 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
return SDValue();
LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
EVT MemVT = Load->getMemoryVT();
- if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
+ if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT, Load->getAddressSpace()) ||
Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
return SDValue();
@@ -14168,7 +14182,7 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
EVT MemVT = LN0->getMemoryVT();
if ((LegalOperations || !LN0->isSimple() ||
VT.isVector()) &&
- !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
+ !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT, LN0->getAddressSpace()))
return SDValue();
SDValue ExtLoad =
@@ -14210,12 +14224,14 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
}
}
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
// TODO: isFixedLengthVector() should be removed and any negative effects on
// code generation being the result of that target's implementation of
// isVectorLoadExtDesirable().
if ((LegalOperations || VT.isFixedLengthVector() ||
- !cast<LoadSDNode>(N0)->isSimple()) &&
- !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
+ LN0->isSimple()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType(),
+ LN0->getAddressSpace()))
return {};
bool DoXform = true;
@@ -14227,7 +14243,6 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
if (!DoXform)
return {};
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
@@ -14258,7 +14273,8 @@ tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
return SDValue();
if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
- !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
+ !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0),
+ Ld->getAddressSpace()))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -14402,7 +14418,8 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
if (!(ISD::isNON_EXTLoad(V.getNode()) &&
ISD::isUNINDEXEDLoad(V.getNode()) &&
cast<LoadSDNode>(V)->isSimple() &&
- TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
+ TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType(),
+ cast<LoadSDNode>(V)->getAddressSpace())))
return false;
// Non-chain users of this value must either be the setcc in this
@@ -14599,7 +14616,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
EVT MemVT = LN00->getMemoryVT();
- if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
+ if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT, LN00->getAddressSpace()) &&
LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
SmallVector<SDNode*, 4> SetCCs;
bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
@@ -14917,7 +14934,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
EVT MemVT = LN00->getMemoryVT();
- if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT, LN00->getAddressSpace()) &&
LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
@@ -15148,7 +15165,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return foldedExt;
} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
+ TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType(),
+ cast<LoadSDNode>(N0)->getAddressSpace())) {
bool DoXform = true;
SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
@@ -15183,7 +15201,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
- if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
+ if (!LegalOperations ||
+ TLI.isLoadExtLegal(ExtType, VT, MemVT, LN0->getAddressSpace())) {
SDValue ExtLoad =
DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
@@ -15497,7 +15516,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
// If the mask is smaller, recompute the type.
if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
- TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
+ TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT,
+ LN->getAddressSpace()))
ExtVT = MaskedVT;
} else if (ExtType == ISD::ZEXTLOAD &&
ShiftMask.isShiftedMask(Offset, ActiveBits) &&
@@ -15506,7 +15526,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// If the mask is shifted we can use a narrower load and a shl to insert
// the trailing zeros.
if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
- TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
+ TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT,
+ LN->getAddressSpace())) {
ExtVT = MaskedVT;
ShAmt = Offset + ShAmt;
ShiftedOffset = Offset;
@@ -15732,7 +15753,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
N0.hasOneUse()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT,
+ cast<LoadSDNode>(N0)->getAddressSpace()))) {
auto *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad =
DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
@@ -15747,7 +15769,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT,
+ cast<LoadSDNode>(N0)->getAddressSpace()))) {
auto *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad =
DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
@@ -15762,7 +15785,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT,
+ Ld->getAddressSpace())) {
SDValue ExtMaskedLoad = DAG.getMaskedLoad(
VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
@@ -19109,7 +19133,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
+ TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType(),
+ cast<LoadSDNode>(N0)->getAddressSpace())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
LN0->getChain(),
@@ -22161,12 +22186,16 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
} else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ unsigned AS = LoadNodes[i].MemNode->getAddressSpace();
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
DAG.getMachineFunction()) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy,
+ AS) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy,
+ AS) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy,
+ AS) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bcfc2c5dc9f83..f66ab797fea83 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -742,8 +742,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// nice to have an effective generic way of getting these benefits...
// Until such a way is found, don't insist on promoting i1 here.
(SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) ==
- TargetLowering::Promote)) {
+ TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1,
+ LD->getAddressSpace()) == TargetLowering::Promote)) {
// Promote to a byte-sized load if not loading an integral number of
// bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
unsigned NewWidth = SrcVT.getStoreSizeInBits();
@@ -856,7 +856,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
} else {
bool isCustom = false;
switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0),
- SrcVT.getSimpleVT())) {
+ SrcVT.getSimpleVT(), LD->getAddressSpace())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
@@ -884,13 +884,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
case TargetLowering::Expand: {
EVT DestVT = Node->getValueType(0);
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT,
+ LD->getAddressSpace())) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
if ((LoadVT.isFloatingPoint() == SrcVT.isFloatingPoint()) &&
(TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
- TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT))) {
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT,
+ LD->getAddressSpace()))) {
// If we are loading a legal type, this is a non-extload followed by a
// full extend.
ISD::LoadExtType MidExtType =
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8e423c4f83b38..be8e780a6f55d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -301,7 +301,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
ISD::LoadExtType ExtType = LD->getExtensionType();
EVT LoadedVT = LD->getMemoryVT();
if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
- Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
+ Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT,
+ LD->getAddressSpace());
break;
}
case ISD::STORE: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fd6d20e146bb2..78d8353a5901e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12364,7 +12364,8 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
if (ResultVT.bitsGT(VecEltVT)) {
// If the result type of vextract is wider than the load, then issue an
// extending load instead.
- ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
+ ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT,
+ OriginalLoad->getAddressSpace())
? ISD::ZEXTLOAD
: ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 5432d9673deaf..e9cdb72dd7eb2 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -731,6 +731,8 @@ void TargetLoweringBase::initActions() {
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
+ LoadExtActions[0].fill({});
+
llvm::fill(RegClassForVT, nullptr);
llvm::fill(TargetDAGCombineArray, 0);
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index b8dd377377dab..0d1c452db019b 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -279,7 +279,6 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
-; VI-NEXT: v_and_b32_e32 v0, 1, v0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
@@ -302,7 +301,6 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
-; CI-NEXT: v_and_b32_e32 v0, 1, v0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
@@ -325,7 +323,6 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
@@ -340,7 +337,6 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_zeroext at rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_zeroext at rel32@hi+12
; GFX11-NEXT: s_mov_b32 s32, 0
-; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-NEXT: s_endpgm
;
@@ -360,7 +356,6 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_zeroext at rel32@hi+12
; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
; HSA-NEXT: s_mov_b32 s32, 0
-; HSA-NEXT: v_and_b32_e32 v0, 1, v0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
%var = load volatile i1, ptr addrspace(1) poison
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index 38003f6075c35..0514b1cb38e1c 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -36,6 +36,7 @@ define zeroext i1 @i1_zeroext_func_void() #0 {
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
+; GFX789-NEXT: v_and_b32_e32 v0, 1, v0
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_zeroext_func_void:
@@ -45,6 +46,7 @@ define zeroext i1 @i1_zeroext_func_void() #0 {
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i1, ptr addrspace(1) poison
ret i1 %val
diff --git a/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll b/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll
index f92ba7a8978b9..a0a760133e5c0 100644
--- a/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll
@@ -55,27 +55,15 @@ define amdgpu_kernel void @sextload_global_i8_to_i16(ptr addrspace(1) %out, ptr
}
define amdgpu_kernel void @zextload_global_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
-; GFX11-REAL16-LABEL: zextload_global_i8_to_i64:
-; GFX11-REAL16: ; %bb.0:
-; GFX11-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-REAL16-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-REAL16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-REAL16-NEXT: global_load_d16_u8 v0, v1, s[2:3]
-; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-REAL16-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-REAL16-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX11-REAL16-NEXT: s_endpgm
-;
-; GFX11-FAKE16-LABEL: zextload_global_i8_to_i64:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-FAKE16-NEXT: global_load_u8 v0, v1, s[2:3]
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-FAKE16-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX11-FAKE16-NEXT: s_endpgm
+; GFX11-LABEL: zextload_global_i8_to_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_u8 v0, v1, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b64 v1, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
%a = load i8, ptr addrspace(1) %in
%ext = zext i8 %a to i64
store i64 %ext, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll b/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll
index 59dfd713ef4fd..a06692dce9f0d 100644
--- a/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll
@@ -4,11 +4,11 @@
define protected amdgpu_kernel void @InferNothing(i32 %a, ptr %b, double %c) {
; CHECK-LABEL: InferNothing:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_ashr_i32 s7, s6, 31
; CHECK-NEXT: v_mov_b32_e32 v2, s2
+; CHECK-NEXT: s_ashr_i32 s7, s6, 31
; CHECK-NEXT: v_mov_b32_e32 v3, s3
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
; CHECK-NEXT: s_add_u32 s0, s2, s0
@@ -31,11 +31,11 @@ entry:
define protected amdgpu_kernel void @InferFadd(i32 %a, ptr addrspace(1) %b, double %c) {
; CHECK-LABEL: InferFadd:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_ashr_i32 s7, s6, 31
; CHECK-NEXT: v_mov_b32_e32 v2, s2
+; CHECK-NEXT: s_ashr_i32 s7, s6, 31
; CHECK-NEXT: v_mov_b32_e32 v3, s3
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
; CHECK-NEXT: s_add_u32 s0, s0, s2
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index 5b2213592f495..770f7e06c383f 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -645,6 +645,7 @@ define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, pt
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
@@ -658,6 +659,7 @@ define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, pt
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v2, 1, v2
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
@@ -683,6 +685,8 @@ define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, pt
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_and_b32 s2, s2, 1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
@@ -693,6 +697,8 @@ define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, pt
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_and_b32 s2, s2, 1
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
@@ -796,6 +802,7 @@ define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
@@ -809,6 +816,7 @@ define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v2, 1, v2
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
@@ -834,6 +842,8 @@ define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_and_b32 s2, s2, 1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
@@ -844,6 +854,8 @@ define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_and_b32 s2, s2, 1
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index b534c2c267fad..d935d0f28e442 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -5285,16 +5285,15 @@ define amdgpu_kernel void @constant_zextload_i8_to_i64(ptr addrspace(1) %out, pt
; GFX8-NOHSA-LABEL: constant_zextload_i8_to_i64:
; GFX8-NOHSA: ; %bb.0:
; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3
-; GFX8-NOHSA-NEXT: flat_load_ubyte v2, v[0:1]
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NOHSA-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s1
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NOHSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NOHSA-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_i8_to_i64:
@@ -5314,27 +5313,15 @@ define amdgpu_kernel void @constant_zextload_i8_to_i64(ptr addrspace(1) %out, pt
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
-; GFX12-TRUE16-LABEL: constant_zextload_i8_to_i64:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, 0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[2:3]
-; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX12-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX12-TRUE16-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX12-TRUE16-NEXT: s_endpgm
-;
-; GFX12-FAKE16-LABEL: constant_zextload_i8_to_i64:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-FAKE16-NEXT: v_mov_b32_e32 v1, 0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: global_load_u8 v0, v1, s[2:3]
-; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
-; GFX12-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX12-FAKE16-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX12-FAKE16-NEXT: s_endpgm
+; GFX12-LABEL: constant_zextload_i8_to_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1]
+; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(4) %in
%ext = zext i8 %a to i64
store i64 %ext, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index f879dc660203f..a5be0a312898b 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -5116,7 +5116,6 @@ define amdgpu_kernel void @global_zextload_i8_to_i64(ptr addrspace(1) %out, ptr
; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GCN-NOHSA-VI-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
index 04a5cac116d78..d4b9f33e961d5 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
@@ -542,7 +542,7 @@ define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(ptr addrspace(4) %
; SI-NEXT: s_load_dword s2, s[0:1], 0x0
; SI-NEXT: s_mov_b64 s[0:1], 0
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_and_b32 s2, s2, 0xff
+; SI-NEXT: s_and_b32 s2, s2, 1
; SI-NEXT: s_addk_i32 s2, 0x3e7
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: s_mov_b32 s2, -1
@@ -558,7 +558,7 @@ define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(ptr addrspace(4) %
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s0, s[0:1], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_and_b32 s0, s0, 0xff
+; VI-NEXT: s_and_b32 s0, s0, 1
; VI-NEXT: s_addk_i32 s0, 0x3e7
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
@@ -571,7 +571,7 @@ define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(ptr addrspace(4) %
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_and_b32 s0, s0, 0xff
+; GFX11-NEXT: s_and_b32 s0, s0, 1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_addk_i32 s0, 0x3e7
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
More information about the llvm-commits
mailing list