[llvm] [GVN] Load-store forwaring of scalable store to fixed load. (PR #124748)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 07:13:22 PST 2025
https://github.com/iamlouk updated https://github.com/llvm/llvm-project/pull/124748
>From 3ce8db3c3f5469c23d730d3b176db5ade1d6134c Mon Sep 17 00:00:00 2001
From: Lou Knauer <lou.knauer at sipearl.com>
Date: Tue, 28 Jan 2025 16:36:03 +0100
Subject: [PATCH 1/2] [GVN] Tests for load-store forwaring of scalable store to
fixed load
---
llvm/test/Transforms/GVN/vscale.ll | 98 ++++++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 67cbfc2f05ef84..45ca5cb245518e 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -641,3 +641,101 @@ entry:
call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull %ref.tmp)
ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %15
}
+
+define <vscale x 4 x float> @scalable_store_to_fixed_load(<vscale x 4 x float> %.coerce) #1 {
+; CHECK-LABEL: @scalable_store_to_fixed_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
+; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
+; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
+; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
+;
+entry:
+ %retval = alloca { <16 x float> }
+ %0 = fadd <vscale x 4 x float> %.coerce, %.coerce
+ store <vscale x 4 x float> %0, ptr %retval
+ %1 = load <16 x float>, ptr %retval
+ %cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
+ ret <vscale x 4 x float> %cast.scalable
+}
+
+define <vscale x 4 x float> @scalable_store_to_fixed_load_with_offset(<vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: @scalable_store_to_fixed_load_with_offset(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTR:%.*]] = alloca { <32 x float> }, align 128
+; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
+; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[GEP]], align 64
+; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
+; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
+;
+entry:
+ %ptr = alloca { <32 x float> }
+ store <vscale x 4 x float> %a, ptr %ptr
+ %gep = getelementptr inbounds i8, ptr %ptr, i64 8
+ %1 = load <16 x float>, ptr %gep
+ %cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
+ ret <vscale x 4 x float> %cast.scalable
+}
+
+define <vscale x 4 x float> @scalable_store_to_fixed_load_unknown_vscale(<vscale x 4 x float> %.coerce) {
+; CHECK-LABEL: @scalable_store_to_fixed_load_unknown_vscale(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
+; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
+; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
+; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
+;
+entry:
+ %retval = alloca { <16 x float> }
+ %0 = fadd <vscale x 4 x float> %.coerce, %.coerce
+ store <vscale x 4 x float> %0, ptr %retval
+ %1 = load <16 x float>, ptr %retval
+ %cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
+ ret <vscale x 4 x float> %cast.scalable
+}
+
+define <vscale x 4 x float> @scalable_store_to_fixed_load_size_missmatch(<vscale x 4 x float> %.coerce) #1 {
+; CHECK-LABEL: @scalable_store_to_fixed_load_size_missmatch(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <32 x float> }, align 128
+; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, ptr [[RETVAL]], align 128
+; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> [[TMP1]], i64 0)
+; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
+;
+entry:
+ %retval = alloca { <32 x float> }
+ %0 = fadd <vscale x 4 x float> %.coerce, %.coerce
+ store <vscale x 4 x float> %0, ptr %retval
+ %1 = load <32 x float>, ptr %retval
+ %cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> %1, i64 0)
+ ret <vscale x 4 x float> %cast.scalable
+}
+
+; This function does not have a fixed vscale, but the loaded vector is still known
+; to be smaller or equal in size compared to the stored vector.
+define <4 x float> @scalable_store_to_small_fixed_load(<vscale x 4 x float> %a) {
+; CHECK-LABEL: @scalable_store_to_small_fixed_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTR:%.*]] = alloca <vscale x 4 x float>, align 16
+; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16
+; CHECK-NEXT: ret <4 x float> [[TMP0]]
+;
+entry:
+ %ptr = alloca <vscale x 4 x float>
+ store <vscale x 4 x float> %a, ptr %ptr
+ %1 = load <4 x float>, ptr %ptr
+ ret <4 x float> %1
+}
+
+declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float>, <16 x float>, i64 immarg)
+declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float>, <32 x float>, i64 immarg)
+
+attributes #1 = { vscale_range(4,4) }
>From e39959470dcd003fbf232b9e6b2beec45d659b82 Mon Sep 17 00:00:00 2001
From: Lou Knauer <lou.knauer at sipearl.com>
Date: Tue, 28 Jan 2025 16:38:38 +0100
Subject: [PATCH 2/2] [GVN] Load-store forwaring of scalable store to fixed
load.
When storing a scalable vector and the vscale is a compile-time known
constant, store-to-load forwarding through temporary
@llvm.vector.extract calls, even if the loaded vector is fixed-sized
instead of scalable. InstCombine then folds the insert/extract pair
away.
The usecase is shown in this [godbold
link](https://godbolt.org/z/KT3sMrMbd), which shows that clang generates
IR that matches this pattern when the "arm_sve_vector_bits" attribute is
used:
```c
typedef svfloat32_t svfloat32_fixed_t
__attribute__((arm_sve_vector_bits(512)));
struct svfloat32_wrapped_t {
svfloat32_fixed_t v;
};
static inline svfloat32_wrapped_t
add(svfloat32_wrapped_t a, svfloat32_wrapped_t b) {
return {svadd_f32_x(svptrue_b32(), a.v, b.v)};
}
svfloat32_wrapped_t
foo(svfloat32_wrapped_t a, svfloat32_wrapped_t b) {
// The IR pattern this patch matches is generated for this return:
return add(a, b);
}
```
---
.../llvm/Transforms/Utils/VNCoercion.h | 7 +-
llvm/lib/Transforms/Scalar/GVN.cpp | 12 ++-
llvm/lib/Transforms/Utils/VNCoercion.cpp | 93 ++++++++++++++-----
llvm/test/Transforms/GVN/vscale.ll | 6 +-
4 files changed, 81 insertions(+), 37 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/VNCoercion.h b/llvm/include/llvm/Transforms/Utils/VNCoercion.h
index f1ea94bf60fcc6..ed4dbad50ee853 100644
--- a/llvm/include/llvm/Transforms/Utils/VNCoercion.h
+++ b/llvm/include/llvm/Transforms/Utils/VNCoercion.h
@@ -23,6 +23,7 @@
namespace llvm {
class Constant;
+class Function;
class StoreInst;
class LoadInst;
class MemIntrinsic;
@@ -35,7 +36,7 @@ namespace VNCoercion {
/// Return true if CoerceAvailableValueToLoadType would succeed if it was
/// called.
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
- const DataLayout &DL);
+ Function *F);
/// If we saw a store of a value to memory, and then a load from a must-aliased
/// pointer of a different type, try to coerce the stored value to the loaded
@@ -44,7 +45,7 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
///
/// If we can't do it, return null.
Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
- IRBuilderBase &IRB, const DataLayout &DL);
+ IRBuilderBase &IRB, Function *F);
/// This function determines whether a value for the pointer LoadPtr can be
/// extracted from the store at DepSI.
@@ -75,7 +76,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
/// It inserts instructions to do so at InsertPt, and returns the extracted
/// value.
Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL);
+ Instruction *InsertPt, Function *F);
// This is the same as getValueForLoad, except it performs no insertion.
// It only allows constant inputs.
Constant *getConstantValueForLoad(Constant *SrcVal, unsigned Offset,
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 21eb7f741d7c82..9c9d8f328f7330 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1096,7 +1096,7 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
+ Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, Load->getFunction());
LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
<< " " << *getSimpleValue() << '\n'
@@ -1109,7 +1109,7 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
Res = CoercedLoad;
combineMetadataForCSE(CoercedLoad, Load, false);
} else {
- Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL);
+ Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, Load->getFunction());
// We are adding a new user for this load, for which the original
// metadata may not hold. Additionally, the new load may have a different
// size and type, so their metadata cannot be combined in any
@@ -1291,7 +1291,8 @@ GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
// If MD reported clobber, check it was nested.
if (DepInfo.isClobber() &&
- canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
+ canCoerceMustAliasedValueToLoad(DepLoad, LoadType,
+ DepLoad->getFunction())) {
const auto ClobberOff = MD->getClobberOffset(DepLoad);
// GVN has no deal with a negative offset.
Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
@@ -1343,7 +1344,7 @@ GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
// different types if we have to. If the stored value is convertable to
// the loaded value, we can reuse it.
if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(),
- DL))
+ S->getFunction()))
return std::nullopt;
// Can't forward from non-atomic to atomic without violating memory model.
@@ -1357,7 +1358,8 @@ GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
// If the types mismatch and we can't handle it, reject reuse of the load.
// If the stored value is larger or equal to the loaded value, we can reuse
// it.
- if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(), DL))
+ if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(),
+ LD->getFunction()))
return std::nullopt;
// Can't forward from non-atomic to atomic without violating memory model.
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 7a61ab74166389..b4b7379212ada5 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -13,32 +13,52 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
return Ty->isStructTy() || Ty->isArrayTy() || isa<ScalableVectorType>(Ty);
}
+static std::optional<unsigned> getKnownVScale(Function *F) {
+ const auto &Attrs = F->getAttributes().getFnAttrs();
+ unsigned MinVScale = Attrs.getVScaleRangeMin();
+ if (Attrs.getVScaleRangeMax() == MinVScale)
+ return MinVScale;
+ return std::nullopt;
+}
+
/// Return true if coerceAvailableValueToLoadType will succeed.
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
- const DataLayout &DL) {
+ Function *F) {
Type *StoredTy = StoredVal->getType();
-
if (StoredTy == LoadTy)
return true;
+ const DataLayout &DL = F->getDataLayout();
+ TypeSize StoreSize = DL.getTypeSizeInBits(StoredTy);
+ TypeSize LoadSize = DL.getTypeSizeInBits(LoadTy);
if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
- DL.getTypeSizeInBits(StoredTy) == DL.getTypeSizeInBits(LoadTy))
+ StoreSize == LoadSize)
return true;
- // If the loaded/stored value is a first class array/struct, or scalable type,
- // don't try to transform them. We need to be able to bitcast to integer.
- if (isFirstClassAggregateOrScalableType(LoadTy) ||
- isFirstClassAggregateOrScalableType(StoredTy))
+ // If the loaded/stored value is a first class array/struct, don't try to
+ // transform them. We need to be able to bitcast to integer. For scalable
+ // vectors forwarded to fixed-sized vectors @llvm.vector.extract is used.
+ if (isa<ScalableVectorType>(StoredTy) && isa<FixedVectorType>(LoadTy)) {
+ if (StoredTy->getScalarType() != LoadTy->getScalarType())
+ return false;
+
+ // If the VScale is known at compile-time, use that information to
+ // allow for wider loads.
+ std::optional<unsigned> VScale = getKnownVScale(F);
+ if (VScale)
+ StoreSize =
+ TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale.value());
+ } else if (isFirstClassAggregateOrScalableType(LoadTy) ||
+ isFirstClassAggregateOrScalableType(StoredTy)) {
return false;
-
- uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedValue();
+ }
// The store size must be byte-aligned to support future type casts.
if (llvm::alignTo(StoreSize, 8) != StoreSize)
return false;
// The store has to be at least as big as the load.
- if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedValue())
+ if (!TypeSize::isKnownGE(StoreSize, LoadSize))
return false;
bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
@@ -57,11 +77,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
return false;
}
-
// The implementation below uses inttoptr for vectors of unequal size; we
// can't allow this for non integral pointers. We could teach it to extract
// exact subvectors if desired.
- if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedValue())
+ if (StoredNI && StoreSize != LoadSize)
return false;
if (StoredTy->isTargetExtTy() || LoadTy->isTargetExtTy())
@@ -78,15 +97,24 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
/// If we can't do it, return null.
Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
IRBuilderBase &Helper,
- const DataLayout &DL) {
- assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
+ Function *F) {
+ assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, F) &&
"precondition violation - materialization can't fail");
+ const DataLayout &DL = F->getDataLayout();
if (auto *C = dyn_cast<Constant>(StoredVal))
StoredVal = ConstantFoldConstant(C, DL);
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
+ // If this is a scalable vector forwarded to a fixed vector load, create
+ // a @llvm.vector.extract instead of bitcasts.
+ if (isa<ScalableVectorType>(StoredVal->getType()) &&
+ isa<FixedVectorType>(LoadedTy)) {
+ return Helper.CreateIntrinsic(LoadedTy, Intrinsic::vector_extract,
+ {StoredVal, Helper.getInt64(0)});
+ }
+
TypeSize StoredValSize = DL.getTypeSizeInBits(StoredValTy);
TypeSize LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
@@ -220,7 +248,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
return -1;
- if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL))
+ if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DepSI->getFunction()))
return -1;
Value *StorePtr = DepSI->getPointerOperand();
@@ -235,11 +263,11 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
/// the other load can feed into the second load.
int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
const DataLayout &DL) {
- // Cannot handle reading from store of first-class aggregate yet.
- if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ // Cannot handle reading from store of first-class aggregate or scalable type.
+ if (isFirstClassAggregateOrScalableType(DepLI->getType()))
return -1;
- if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL))
+ if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DepLI->getFunction()))
return -1;
Value *DepPtr = DepLI->getPointerOperand();
@@ -315,6 +343,16 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
return SrcVal;
}
+ // For the case of a scalable vector beeing forwarded to a fixed-sized load,
+ // only equal element types are allowed and a @llvm.vector.extract will be
+ // used instead of bitcasts.
+ if (isa<ScalableVectorType>(SrcVal->getType()) &&
+ isa<FixedVectorType>(LoadTy)) {
+ assert(Offset == 0 &&
+ SrcVal->getType()->getScalarType() == LoadTy->getScalarType());
+ return SrcVal;
+ }
+
uint64_t StoreSize =
(DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8;
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8;
@@ -344,20 +382,24 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
}
Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL) {
+ Instruction *InsertPt, Function *F) {
+ const DataLayout &DL = F->getDataLayout();
#ifndef NDEBUG
TypeSize SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
TypeSize LoadSize = DL.getTypeStoreSize(LoadTy);
- assert(SrcValSize.isScalable() == LoadSize.isScalable());
+ if (auto VScale = getKnownVScale(InsertPt->getFunction());
+ VScale && SrcValSize.isScalable() && !LoadSize.isScalable())
+ SrcValSize =
+ TypeSize::getFixed(SrcValSize.getKnownMinValue() * VScale.value());
assert((SrcValSize.isScalable() || Offset + LoadSize <= SrcValSize) &&
"Expected Offset + LoadSize <= SrcValSize");
- assert(
- (!SrcValSize.isScalable() || (Offset == 0 && LoadSize == SrcValSize)) &&
- "Expected scalable type sizes to match");
+ assert((!SrcValSize.isScalable() ||
+ (Offset == 0 && TypeSize::isKnownLE(LoadSize, SrcValSize))) &&
+ "Expected offset of zero and LoadSize <= SrcValSize");
#endif
IRBuilder<> Builder(InsertPt);
SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
- return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
+ return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, F);
}
Constant *getConstantValueForLoad(Constant *SrcVal, unsigned Offset,
@@ -408,7 +450,8 @@ Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
++NumBytesSet;
}
- return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
+ return coerceAvailableValueToLoadType(
+ Val, LoadTy, Builder, InsertPt->getFunction());
}
// Otherwise, this is a memcpy/memmove from a constant global.
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 45ca5cb245518e..084ce6a6d66ff2 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -648,9 +648,7 @@ define <vscale x 4 x float> @scalable_store_to_fixed_load(<vscale x 4 x float> %
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
-; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
-; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
;
entry:
%retval = alloca { <16 x float> }
@@ -725,7 +723,7 @@ define <4 x float> @scalable_store_to_small_fixed_load(<vscale x 4 x float> %a)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR:%.*]] = alloca <vscale x 4 x float>, align 16
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
-; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> [[A]], i64 0)
; CHECK-NEXT: ret <4 x float> [[TMP0]]
;
entry:
More information about the llvm-commits
mailing list