[llvm] ad35d91 - [VectorCombine] Enable transform 'foldSingleElementStore' for scalable vector types
Ben Shi via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 23 02:13:20 PDT 2023
Author: Ben Shi
Date: 2023-08-23T17:12:36+08:00
New Revision: ad35d916cd34154fd88757374d26491611f60cff
URL: https://github.com/llvm/llvm-project/commit/ad35d916cd34154fd88757374d26491611f60cff
DIFF: https://github.com/llvm/llvm-project/commit/ad35d916cd34154fd88757374d26491611f60cff.diff
LOG: [VectorCombine] Enable transform 'foldSingleElementStore' for scalable vector types
The transform 'foldSingleElementStore' can be applied to scalable
vector types if the index is less than the minimum number of elements.
Reviewed By: dmgreen, nikic
Differential Revision: https://reviews.llvm.org/D157676
Added:
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/load-insert-store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 13464c9d3496e0..66e3bcaac0adb2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1013,19 +1013,24 @@ class ScalarizationResult {
/// Check if it is legal to scalarize a memory access to \p VecTy at index \p
/// Idx. \p Idx must access a valid vector element.
-static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy,
- Value *Idx, Instruction *CtxI,
+static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx,
+ Instruction *CtxI,
AssumptionCache &AC,
const DominatorTree &DT) {
+ // We do checks for both fixed vector types and scalable vector types.
+ // This is the number of elements of fixed vector types,
+ // or the minium number of elements of scalable vector types.
+ uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
+
if (auto *C = dyn_cast<ConstantInt>(Idx)) {
- if (C->getValue().ult(VecTy->getNumElements()))
+ if (C->getValue().ult(NumElements))
return ScalarizationResult::safe();
return ScalarizationResult::unsafe();
}
unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
APInt Zero(IntWidth, 0);
- APInt MaxElts(IntWidth, VecTy->getNumElements());
+ APInt MaxElts(IntWidth, NumElements);
ConstantRange ValidIndices(Zero, MaxElts);
ConstantRange IdxRange(IntWidth, true);
@@ -1074,8 +1079,7 @@ static Align computeAlignmentAfterScalarization(Align VectorAlignment,
// store i32 %b, i32* %1
bool VectorCombine::foldSingleElementStore(Instruction &I) {
auto *SI = cast<StoreInst>(&I);
- if (!SI->isSimple() ||
- !isa<FixedVectorType>(SI->getValueOperand()->getType()))
+ if (!SI->isSimple() || !isa<VectorType>(SI->getValueOperand()->getType()))
return false;
// TODO: Combine more complicated patterns (multiple insert) by referencing
@@ -1089,7 +1093,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
return false;
if (auto *Load = dyn_cast<LoadInst>(Source)) {
- auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
+ auto VecTy = cast<VectorType>(SI->getValueOperand()->getType());
const DataLayout &DL = I.getModule()->getDataLayout();
Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
// Don't optimize for atomic/volatile load or store. Ensure memory is not
diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
index 8d847af8d006d2..3b092fedfdfa21 100644
--- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=vector-combine -data-layout=e < %s | FileCheck %s --check-prefixes=CHECK,LE
-; RUN: opt -S -passes=vector-combine -data-layout=E < %s | FileCheck %s --check-prefixes=CHECK,BE
+; RUN: opt -S -passes=vector-combine -data-layout=e < %s | FileCheck %s
+; RUN: opt -S -passes=vector-combine -data-layout=E < %s | FileCheck %s
define void @insert_store(ptr %q, i8 zeroext %s) {
; CHECK-LABEL: @insert_store(
@@ -49,9 +49,8 @@ entry:
define void @insert_store_vscale(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_vscale(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
-; CHECK-NEXT: store <vscale x 8 x i16> [[VECINS]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 8 x i16>, ptr [[Q:%.*]], i32 0, i32 3
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -251,9 +250,8 @@ define void @insert_store_vscale_nonconst_index_known_valid_by_assume(ptr %q, i8
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
-; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX]]
+; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
entry:
@@ -351,10 +349,9 @@ entry:
define void @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and(ptr %q, i8 zeroext %s, i32 noundef %idx) {
; CHECK-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
-; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
+; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
entry:
@@ -493,10 +490,9 @@ entry:
define void @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem(ptr %q, i8 zeroext %s, i32 noundef %idx) {
; CHECK-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
-; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
+; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
entry:
@@ -818,6 +814,3 @@ bb:
declare i32 @bar(i32, i1) readonly
declare double @llvm.log2.f64(double)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; BE: {{.*}}
-; LE: {{.*}}
More information about the llvm-commits
mailing list