[llvm] 7ce3d92 - [Hexagon] Improve safety of aligned loads/stores in HVC
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Wed May 17 04:34:13 PDT 2023
Author: Krzysztof Parzyszek
Date: 2023-05-17T04:33:53-07:00
New Revision: 7ce3d9228ff1aa881a7aebd451db36dcf159468a
URL: https://github.com/llvm/llvm-project/commit/7ce3d9228ff1aa881a7aebd451db36dcf159468a
DIFF: https://github.com/llvm/llvm-project/commit/7ce3d9228ff1aa881a7aebd451db36dcf159468a.diff
LOG: [Hexagon] Improve safety of aligned loads/stores in HVC
Generate a predicated variant of the last load/store in a group to
avoid accessing OOB memory. Disable vector alignment on HVX prior
to v62, since v62 does not have predicated vector loads.
Added:
Modified:
llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
llvm/test/CodeGen/Hexagon/autohvx/vector-align-basic.ll
llvm/test/CodeGen/Hexagon/autohvx/vector-align-store-mask.ll
llvm/test/CodeGen/Hexagon/autohvx/vector-align-store.ll
llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 03bc817c629b8..3023431c713da 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -56,6 +57,8 @@
using namespace llvm;
namespace {
+cl::opt<bool> AlignFullHvxStores("hvc-align-full-hvx-stores", cl::Hidden);
+
class HexagonVectorCombine {
public:
HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
@@ -79,10 +82,14 @@ class HexagonVectorCombine {
ConstantInt *getConstInt(int Val, unsigned Width = 32) const;
// Get the integer value of V, if it exists.
std::optional<APInt> getIntValue(const Value *Val) const;
- // Is V a constant 0, or a vector of 0s?
+ // Is Val a constant 0, or a vector of 0s?
bool isZero(const Value *Val) const;
- // Is V an undef value?
+ // Is Val an undef value?
bool isUndef(const Value *Val) const;
+ // Is Val a scalar (i1 true) or a vector of (i1 true)?
+ bool isTrue(const Value *Val) const;
+ // Is Val a scalar (i1 false) or a vector of (i1 false)?
+ bool isFalse(const Value *Val) const;
// Get HVX vector type with the given element type.
VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
@@ -125,7 +132,8 @@ class HexagonVectorCombine {
Value *createHvxIntrinsic(IRBuilderBase &Builder, Intrinsic::ID IntID,
Type *RetTy, ArrayRef<Value *> Args,
- ArrayRef<Type *> ArgTys = std::nullopt) const;
+ ArrayRef<Type *> ArgTys = std::nullopt,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
SmallVector<Value *> splitVectorElements(IRBuilderBase &Builder, Value *Vec,
unsigned ToWidth) const;
Value *joinVectorElements(IRBuilderBase &Builder, ArrayRef<Value *> Values,
@@ -234,6 +242,7 @@ class AlignVectors {
int size() const { return Blocks.size(); }
Block &operator[](int i) { return Blocks[i]; }
+ const Block &operator[](int i) const { return Blocks[i]; }
std::vector<Block> Blocks;
@@ -259,10 +268,29 @@ class AlignVectors {
int Adjust) const;
Value *createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
int Alignment) const;
- Value *createAlignedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
- int Alignment, Value *Mask, Value *PassThru) const;
- Value *createAlignedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
- int Alignment, Value *Mask) const;
+
+ Value *createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ Value *PassThru,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
+ Value *createSimpleLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ int Alignment,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
+
+ Value *createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ ArrayRef<Value *> MDSources = std ::nullopt) const;
+ Value *createSimpleStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ int Alignment,
+ ArrayRef<Value *> MDSources = std ::nullopt) const;
+
+ Value *createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ Value *Predicate, int Alignment,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
+ Value *
+ createPredicatedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ Value *Predicate, int Alignment,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
DepList getUpwardDeps(Instruction *In, Instruction *Base) const;
bool createAddressGroups();
@@ -275,6 +303,9 @@ class AlignVectors {
int ScLen, Value *AlignVal, Value *AlignAddr) const;
bool realignGroup(const MoveGroup &Move) const;
+ Value *makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
+ int Alignment) const;
+
friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan::Block &B);
@@ -596,26 +627,137 @@ auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
return Builder.CreateIntToPtr(And, ValTy->getPointerTo(), "itp");
}
-auto AlignVectors::createAlignedLoad(IRBuilderBase &Builder, Type *ValTy,
- Value *Ptr, int Alignment, Value *Mask,
- Value *PassThru) const -> Value * {
+auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ Value *PassThru,
+ ArrayRef<Value *> MDSources) const -> Value * {
+ bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
+ // Predicate is nullptr if not creating predicated load
+ if (Predicate) {
+ assert(!Predicate->getType()->isVectorTy() &&
+ "Expectning scalar predicate");
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(ValTy);
+ if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
+ Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
+ Alignment, MDSources);
+ return Builder.CreateSelect(Mask, Load, PassThru);
+ }
+ // Predicate == true here.
+ }
assert(!HVC.isUndef(Mask)); // Should this be allowed?
if (HVC.isZero(Mask))
return PassThru;
- if (Mask == ConstantInt::getTrue(Mask->getType()))
- return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");
- return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru,
- "mld");
+ if (HVC.isTrue(Mask))
+ return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
+
+ Instruction *Load = Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment),
+ Mask, PassThru, "mld");
+ propagateMetadata(Load, MDSources);
+ return Load;
}
-auto AlignVectors::createAlignedStore(IRBuilderBase &Builder, Value *Val,
- Value *Ptr, int Alignment,
- Value *Mask) const -> Value * {
+auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,
+ Value *Ptr, int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ Instruction *Load =
+ Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");
+ propagateMetadata(Load, MDSources);
+ return Load;
+}
+
+auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,
+ Value *Ptr, Value *Predicate,
+ int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ assert(HVC.HST.isTypeForHVX(ValTy) &&
+ "Predicates 'scalar' vector loads not yet supported");
+ assert(Predicate);
+ assert(!Predicate->getType()->isVectorTy() && "Expectning scalar predicate");
+ assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(ValTy);
+ if (HVC.isTrue(Predicate))
+ return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
+
+ auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
+ // FIXME: This may not put the offset from Ptr into the vmem offset.
+ return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
+ {Predicate, Ptr, HVC.getConstInt(0)},
+ std::nullopt, MDSources);
+}
+
+auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ ArrayRef<Value *> MDSources) const -> Value * {
if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
return UndefValue::get(Val->getType());
- if (Mask == ConstantInt::getTrue(Mask->getType()))
- return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
- return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
+ assert(!Predicate || (!Predicate->getType()->isVectorTy() &&
+ "Expectning scalar predicate"));
+ if (Predicate) {
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(Val->getType());
+ if (HVC.isTrue(Predicate))
+ Predicate = nullptr;
+ }
+ // Here both Predicate and Mask are true or unknown.
+
+ if (HVC.isTrue(Mask)) {
+ if (Predicate) { // Predicate unknown
+ return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
+ MDSources);
+ }
+ // Predicate is true:
+ return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
+ }
+
+ // Mask is unknown
+ if (!Predicate) {
+ Instruction *Store =
+ Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
+ propagateMetadata(Store, MDSources);
+ return Store;
+ }
+
+ // Both Predicate and Mask are unknown.
+ // Emulate masked store with predicated-load + mux + predicated-store.
+ Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
+ Predicate, Alignment, MDSources);
+ Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
+ return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
+ MDSources);
+}
+
+auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,
+ Value *Ptr, int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ Instruction *Store = Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
+ propagateMetadata(Store, MDSources);
+ return Store;
+}
+
+auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,
+ Value *Ptr, Value *Predicate,
+ int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ assert(HVC.HST.isTypeForHVX(Val->getType()) &&
+ "Predicates 'scalar' vector stores not yet supported");
+ assert(Predicate);
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(Val->getType());
+ if (HVC.isTrue(Predicate))
+ return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
+
+ assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
+ auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
+ // FIXME: This may not put the offset from Ptr into the vmem offset.
+ return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,
+ {Predicate, Ptr, HVC.getConstInt(0), Val},
+ std::nullopt, MDSources);
}
auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
@@ -739,6 +881,11 @@ auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
// Erase singleton groups.
erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
+
+ // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
+ if (!HVC.HST.useHVXV62Ops())
+ erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });
+
return LoadGroups;
}
@@ -780,6 +927,25 @@ auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
// Erase singleton groups.
erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
+
+ // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
+ if (!HVC.HST.useHVXV62Ops())
+ erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });
+
+ // Erase groups where every store is a full HVX vector. The reason is that
+ // aligning predicated stores generates complex code that may be less
+ // efficient than a sequence of unaligned vector stores.
+ if (!AlignFullHvxStores) {
+ erase_if(StoreGroups, [this](const MoveGroup &G) {
+ return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {
+ auto MaybeInfo = this->getAddrInfo(*S);
+ assert(MaybeInfo.has_value());
+ return HVC.HST.isHVXVectorType(
+ EVT::getEVT(MaybeInfo->ValTy, false));
+ });
+ });
+ }
+
return StoreGroups;
}
@@ -901,17 +1067,18 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
});
auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,
- int Index) {
+ int Index, bool MakePred) {
Value *Ptr =
createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
- // FIXME: generate a predicated load?
- Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
+ Value *Predicate =
+ MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
+
// If vector shifting is potentially needed, accumulate metadata
// from source sections of twice the load width.
int Start = (Index - DoAlign) * ScLen;
int Width = (1 + DoAlign) * ScLen;
- propagateMetadata(cast<Instruction>(Load),
- VSpan.section(Start, Width).values());
+ Value *Load = this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True,
+ Undef, VSpan.section(Start, Width).values());
return cast<Instruction>(Load);
};
@@ -939,7 +1106,8 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;
if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
Builder.SetInsertPoint(Where);
- Loads[Index] = createLoad(Builder, VSpan, Index);
+ Loads[Index] =
+ createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
// We know it's safe to put the load at BasePos, but we'd prefer to put
// it at "Where". To see if the load is safe to be placed at Where, put
// it there first and then check if it's safe to move it to BasePos.
@@ -1036,10 +1204,11 @@ auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
// Create an extra "undef" sector at the beginning and at the end.
// They will be used as the left/right filler in the vlalign step.
- for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
+ for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {
// For stores, the size of each section is an aligned vector length.
// Adjust the store offsets relative to the section start offset.
- ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
+ ByteSpan VSection =
+ VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
Value *AccumV = UndefValue::get(SecTy);
Value *AccumM = HVC.getNullValue(SecTy);
for (ByteSpan::Block &S : VSection) {
@@ -1051,35 +1220,43 @@ auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
S.Seg.Start, S.Seg.Size, S.Pos);
}
- ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
- ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
+ ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
+ ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
}
// vlalign
if (DoAlign) {
- for (int j = 1; j != NumSectors + 2; ++j) {
- Value *PrevV = ASpanV[j - 1].Seg.Val, *ThisV = ASpanV[j].Seg.Val;
- Value *PrevM = ASpanM[j - 1].Seg.Val, *ThisM = ASpanM[j].Seg.Val;
+ for (int Index = 1; Index != NumSectors + 2; ++Index) {
+ Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;
+ Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;
assert(isSectorTy(PrevV->getType()) && isSectorTy(PrevM->getType()));
- ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
- ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
+ ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
+ ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
}
}
- for (int i = 0; i != NumSectors + DoAlign; ++i) {
- Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
- Value *Val = ASpanV[i].Seg.Val;
- Value *Mask = ASpanM[i].Seg.Val; // bytes
- if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
- Value *Store =
- createAlignedStore(Builder, Val, Ptr, ScLen, HVC.vlsb(Builder, Mask));
- // If vector shifting is potentially needed, accumulate metadata
- // from source sections of twice the store width.
- int Start = (i - DoAlign) * ScLen;
- int Width = (1 + DoAlign) * ScLen;
- propagateMetadata(cast<Instruction>(Store),
- VSpan.section(Start, Width).values());
- }
+ auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,
+ const ByteSpan &ASpanM, int Index, bool MakePred) {
+ Value *Val = ASpanV[Index].Seg.Val;
+ Value *Mask = ASpanM[Index].Seg.Val; // bytes
+ if (HVC.isUndef(Val) || HVC.isZero(Mask))
+ return;
+ Value *Ptr =
+ createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
+ Value *Predicate =
+ MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
+
+ // If vector shifting is potentially needed, accumulate metadata
+ // from source sections of twice the store width.
+ int Start = (Index - DoAlign) * ScLen;
+ int Width = (1 + DoAlign) * ScLen;
+ this->createStore(Builder, Val, Ptr, Predicate, ScLen,
+ HVC.vlsb(Builder, Mask),
+ VSpan.section(Start, Width).values());
+ };
+
+ for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {
+ createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
}
}
@@ -1209,6 +1386,15 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
return true;
}
+auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
+ int Alignment) const -> Value * {
+ auto *AlignTy = AlignVal->getType();
+ Value *And =
+ Builder.CreateAnd(AlignVal, ConstantInt::get(AlignTy, Alignment - 1));
+ Value *Zero = ConstantInt::get(AlignTy, 0);
+ return Builder.CreateICmpNE(And, Zero, "isz");
+}
+
auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
if (!HVC.isByteVecTy(Ty))
return false;
@@ -1859,6 +2045,14 @@ auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
return isa<UndefValue>(Val);
}
+auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {
+ return Val == ConstantInt::getTrue(Val->getType());
+}
+
+auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {
+ return isZero(Val);
+}
+
auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
-> VectorType * {
EVT ETy = EVT::getEVT(ElemTy, false);
@@ -2204,7 +2398,8 @@ auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
Intrinsic::ID IntID, Type *RetTy,
ArrayRef<Value *> Args,
- ArrayRef<Type *> ArgTys) const
+ ArrayRef<Type *> ArgTys,
+ ArrayRef<Value *> MDSources) const
-> Value * {
auto getCast = [&](IRBuilderBase &Builder, Value *Val,
Type *DestTy) -> Value * {
@@ -2242,7 +2437,12 @@ auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
IntrArgs.push_back(A);
}
}
- Value *Call = Builder.CreateCall(IntrFn, IntrArgs, "cup");
+ StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";
+ CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);
+
+ MemoryEffects ME = Call->getAttributes().getMemoryEffects();
+ if (!ME.doesNotAccessMemory() && !ME.onlyAccessesInaccessibleMem())
+ propagateMetadata(Call, MDSources);
Type *CallTy = Call->getType();
if (RetTy == nullptr || CallTy == RetTy)
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-basic.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-basic.ll
index fd679b858847a..63c15deb799e9 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-basic.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-basic.ll
@@ -62,6 +62,6 @@ declare <32 x i32> @llvm.masked.load.v32i32.p0(ptr, i32 immarg, <32 x i1>, <32 x
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32 immarg, <32 x i1>) #2
-attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b,-packets" }
+attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length128b,-packets" }
attributes #1 = { argmemonly nounwind readonly willreturn }
attributes #2 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store-mask.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store-mask.ll
index 4124cdf7412eb..9726fe88eb5f1 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store-mask.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store-mask.ll
@@ -31,4 +31,4 @@ b2: ; preds = %b1
ret void
}
-attributes #0 = { "target-features"="+hvxv66,+hvx-length128b" }
+attributes #0 = { "target-features"="+hvxv66,+hvx-length128b,-packets" }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store.ll
index 6a79f3dd152bc..9d10551a6d014 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -hvc-align-full-hvx-stores < %s | FileCheck %s
; Make sure we generate 3 aligned stores.
; CHECK: vmem({{.*}}) =
@@ -17,4 +17,4 @@ b0:
ret void
}
-attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" }
+attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length128b" }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
index 51aaa80b143a1..a931d14620a54 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=hexagon -S -hexagon-vc -instcombine < %s | FileCheck %s
+; RUN: opt -mtriple=hexagon -S -hexagon-vc -instcombine -hvc-align-full-hvx-stores < %s | FileCheck %s
; Check that Hexagon Vector Combine propagates (TBAA) metadata to the
; generated output. (Use instcombine to clean the output up a bit.)
@@ -18,18 +18,20 @@ define <64 x i16> @f0(ptr %a0, i32 %a1) #0 {
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
-; CHECK-NEXT: [[ALD13:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: [[ALD14:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ITP]], i32 128
; CHECK-NEXT: [[ALD2:%.*]] = load <128 x i8>, ptr [[GEP]], align 128, !tbaa [[TBAA0]]
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
-; CHECK-NEXT: [[ALD414:%.*]] = load <32 x i32>, ptr [[GEP3]], align 128, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[CST:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
-; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST]], <32 x i32> [[ALD13]], i32 [[PTI1]])
-; CHECK-NEXT: [[CST11:%.*]] = bitcast <32 x i32> [[CUP]] to <64 x i16>
-; CHECK-NEXT: [[CST8:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
-; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[ALD414]], <32 x i32> [[CST8]], i32 [[PTI1]])
-; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP9]] to <64 x i16>
-; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[CST11]], [[CST12]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
+; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP3]], i32 0), !tbaa [[TBAA0]]
+; CHECK-NEXT: [[CST4:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
+; CHECK-NEXT: [[CUP6:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST4]], <32 x i32> [[ALD14]], i32 [[PTI1]])
+; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP6]] to <64 x i16>
+; CHECK-NEXT: [[CST9:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
+; CHECK-NEXT: [[CUP10:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CUP]], <32 x i32> [[CST9]], i32 [[PTI1]])
+; CHECK-NEXT: [[CST13:%.*]] = bitcast <32 x i32> [[CUP10]] to <64 x i16>
+; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[CST12]], [[CST13]]
; CHECK-NEXT: ret <64 x i16> [[V8]]
;
b0:
@@ -54,18 +56,20 @@ define <64 x i16> @f1(ptr %a0, i32 %a1) #0 {
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
-; CHECK-NEXT: [[ALD13:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[ALD14:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ITP]], i32 128
; CHECK-NEXT: [[ALD2:%.*]] = load <128 x i8>, ptr [[GEP]], align 128
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
-; CHECK-NEXT: [[ALD414:%.*]] = load <32 x i32>, ptr [[GEP3]], align 128
-; CHECK-NEXT: [[CST:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
-; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST]], <32 x i32> [[ALD13]], i32 [[PTI1]])
-; CHECK-NEXT: [[CST11:%.*]] = bitcast <32 x i32> [[CUP]] to <64 x i16>
-; CHECK-NEXT: [[CST8:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
-; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[ALD414]], <32 x i32> [[CST8]], i32 [[PTI1]])
-; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP9]] to <64 x i16>
-; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[CST11]], [[CST12]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
+; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP3]], i32 0)
+; CHECK-NEXT: [[CST4:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
+; CHECK-NEXT: [[CUP6:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST4]], <32 x i32> [[ALD14]], i32 [[PTI1]])
+; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP6]] to <64 x i16>
+; CHECK-NEXT: [[CST9:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
+; CHECK-NEXT: [[CUP10:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CUP]], <32 x i32> [[CST9]], i32 [[PTI1]])
+; CHECK-NEXT: [[CST13:%.*]] = bitcast <32 x i32> [[CUP10]] to <64 x i16>
+; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[CST12]], [[CST13]]
; CHECK-NEXT: ret <64 x i16> [[V8]]
;
b0:
@@ -90,18 +94,20 @@ define <64 x i16> @f2(ptr %a0, i32 %a1) #0 {
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
-; CHECK-NEXT: [[ALD13:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[ALD14:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ITP]], i32 128
; CHECK-NEXT: [[ALD2:%.*]] = load <128 x i8>, ptr [[GEP]], align 128
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
-; CHECK-NEXT: [[ALD414:%.*]] = load <32 x i32>, ptr [[GEP3]], align 128, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT: [[CST:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
-; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST]], <32 x i32> [[ALD13]], i32 [[PTI1]])
-; CHECK-NEXT: [[CST11:%.*]] = bitcast <32 x i32> [[CUP]] to <64 x i16>
-; CHECK-NEXT: [[CST8:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
-; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[ALD414]], <32 x i32> [[CST8]], i32 [[PTI1]])
-; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP9]] to <64 x i16>
-; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[CST11]], [[CST12]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
+; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP3]], i32 0), !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT: [[CST4:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
+; CHECK-NEXT: [[CUP6:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST4]], <32 x i32> [[ALD14]], i32 [[PTI1]])
+; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP6]] to <64 x i16>
+; CHECK-NEXT: [[CST9:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
+; CHECK-NEXT: [[CUP10:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CUP]], <32 x i32> [[CST9]], i32 [[PTI1]])
+; CHECK-NEXT: [[CST13:%.*]] = bitcast <32 x i32> [[CUP10]] to <64 x i16>
+; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[CST12]], [[CST13]]
; CHECK-NEXT: ret <64 x i16> [[V8]]
;
b0:
@@ -148,8 +154,14 @@ define void @f3(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
; CHECK-NEXT: [[TRN18:%.*]] = trunc <128 x i8> [[CST12]] to <128 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST10]], ptr [[GEP]], i32 128, <128 x i1> [[TRN18]]), !tbaa [[TBAA5]]
; CHECK-NEXT: [[GEP19:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
+; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-NEXT: [[TRN20:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST15]], ptr [[GEP19]], i32 128, <128 x i1> [[TRN20]]), !tbaa [[TBAA5]]
+; CHECK-NEXT: [[CUP21:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA5]]
+; CHECK-NEXT: [[CST22:%.*]] = bitcast <32 x i32> [[CUP21]] to <128 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN20]], <128 x i8> [[CST15]], <128 x i8> [[CST22]]
+; CHECK-NEXT: [[CST23:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
+; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST23]]), !tbaa [[TBAA5]]
; CHECK-NEXT: ret void
;
b0:
@@ -195,8 +207,14 @@ define void @f4(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
; CHECK-NEXT: [[TRN18:%.*]] = trunc <128 x i8> [[CST12]] to <128 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST10]], ptr [[GEP]], i32 128, <128 x i1> [[TRN18]])
; CHECK-NEXT: [[GEP19:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
+; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-NEXT: [[TRN20:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST15]], ptr [[GEP19]], i32 128, <128 x i1> [[TRN20]]), !tbaa [[TBAA5]]
+; CHECK-NEXT: [[CUP21:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA5]]
+; CHECK-NEXT: [[CST22:%.*]] = bitcast <32 x i32> [[CUP21]] to <128 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN20]], <128 x i8> [[CST15]], <128 x i8> [[CST22]]
+; CHECK-NEXT: [[CST23:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
+; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST23]]), !tbaa [[TBAA5]]
; CHECK-NEXT: ret void
;
b0:
@@ -242,8 +260,14 @@ define void @f5(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
; CHECK-NEXT: [[TRN18:%.*]] = trunc <128 x i8> [[CST12]] to <128 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST10]], ptr [[GEP]], i32 128, <128 x i1> [[TRN18]])
; CHECK-NEXT: [[GEP19:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
+; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-NEXT: [[TRN20:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST15]], ptr [[GEP19]], i32 128, <128 x i1> [[TRN20]]), !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT: [[CUP21:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT: [[CST22:%.*]] = bitcast <32 x i32> [[CUP21]] to <128 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN20]], <128 x i8> [[CST15]], <128 x i8> [[CST22]]
+; CHECK-NEXT: [[CST23:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
+; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST23]]), !tbaa [[TBAA7]]
; CHECK-NEXT: ret void
;
b0:
@@ -256,7 +280,7 @@ b0:
ret void
}
-attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" }
+attributes #0 = { nounwind "target-cpu"="hexagonv68" "target-features"="+hvxv68,+hvx-length128b" }
!0 = !{!1, !1, i64 0}
!1 = !{!"load type 1", !2}
More information about the llvm-commits
mailing list