[llvm] [LV] Teach LoopVectorizationLegality about struct vector calls (PR #119221)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 07:24:18 PST 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/119221
>From d5b8cfcf23493ac4a545bfe9db0791dffd0eb4c2 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 9 Dec 2024 15:05:00 +0000
Subject: [PATCH 1/3] [LV] Teach LoopVectorizationLegality about struct vector
calls
This is a split-off from #109833 and only adds code relating to checking
if a struct-returning call can be vectorized.
This initial patch only allows the case where all users of the struct
return are `extractvalue` operations that can be widened.
```
%call = tail call { float, float } @foo(float %in_val) #0
%extract_a = extractvalue { float, float } %call, 0
%extract_b = extractvalue { float, float } %call, 1
```
Note: The tests require the VFABI changes from #119000 to pass.
---
llvm/include/llvm/IR/VectorTypeUtils.h | 16 ++
.../Vectorize/LoopVectorizationLegality.h | 10 +
llvm/lib/IR/VectorTypeUtils.cpp | 9 +
.../Vectorize/LoopVectorizationLegality.cpp | 29 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 9 +
.../AArch64/scalable-struct-return.ll | 97 +++++++
.../Transforms/LoopVectorize/struct-return.ll | 268 ++++++++++++++++++
7 files changed, 436 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll
create mode 100644 llvm/test/Transforms/LoopVectorize/struct-return.ll
diff --git a/llvm/include/llvm/IR/VectorTypeUtils.h b/llvm/include/llvm/IR/VectorTypeUtils.h
index f30bf9ee9240b0..d4236b193bc5b1 100644
--- a/llvm/include/llvm/IR/VectorTypeUtils.h
+++ b/llvm/include/llvm/IR/VectorTypeUtils.h
@@ -40,6 +40,10 @@ Type *toScalarizedStructTy(StructType *StructTy);
/// are vectors of matching element count. This does not include empty structs.
bool isVectorizedStructTy(StructType *StructTy);
+/// Returns true if `StructTy` is an unpacked literal struct where all elements
+/// are scalars that can be used as vector element types.
+bool canVectorizeStructTy(StructType *StructTy);
+
/// A helper for converting to vectorized types. For scalar types, this is
/// equivalent to calling `ToVectorTy`. For struct types, this returns a new
/// struct where each element type has been widened to a vector type.
@@ -71,6 +75,18 @@ inline bool isVectorizedTy(Type *Ty) {
return Ty->isVectorTy();
}
+// Returns true if `Ty` is a valid vector element type, void, or an unpacked
+// literal struct where all elements are valid vector element types.
+// Note: Even if a type can be vectorized that does not mean it is valid to do
+// so in all cases. For example, a vectorized struct (as returned by
+// toVectorizedTy) does not perform (de)interleaving, so it can't be used for
+// vectorizing loads/stores.
+inline bool canVectorizeTy(Type *Ty) {
+ if (StructType *StructTy = dyn_cast<StructType>(Ty))
+ return canVectorizeStructTy(StructTy);
+ return Ty->isVoidTy() || VectorType::isValidElementType(Ty);
+}
+
/// Returns the types contained in `Ty`. For struct types, it returns the
/// elements, all other types are returned directly.
inline ArrayRef<Type *> getContainedTypes(Type *const &Ty) {
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index fbe80eddbae07a..cd64b13a034b79 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -422,6 +422,10 @@ class LoopVectorizationLegality {
/// has a vectorized variant available.
bool hasVectorCallVariants() const { return VecCallVariantsFound; }
+ /// Returns true if there is at least one function call in the loop which
+ /// returns a struct type and needs to be vectorized.
+ bool hasStructVectorCall() const { return StructVecVecCallFound; }
+
unsigned getNumStores() const { return LAI->getNumStores(); }
unsigned getNumLoads() const { return LAI->getNumLoads(); }
@@ -644,6 +648,12 @@ class LoopVectorizationLegality {
/// the use of those function variants.
bool VecCallVariantsFound = false;
+ /// If we find a call (to be vectorized) that returns a struct type, record
+ /// that so we can bail out until this is supported.
+ /// TODO: Remove this flag once vectorizing calls with struct returns is
+ /// supported.
+ bool StructVecVecCallFound = false;
+
/// Indicates whether this loop has an uncountable early exit, i.e. an
/// uncountable exiting block that is not the latch.
bool HasUncountableEarlyExit = false;
diff --git a/llvm/lib/IR/VectorTypeUtils.cpp b/llvm/lib/IR/VectorTypeUtils.cpp
index e6e265414a2b8e..2f05e3b8619fb4 100644
--- a/llvm/lib/IR/VectorTypeUtils.cpp
+++ b/llvm/lib/IR/VectorTypeUtils.cpp
@@ -52,3 +52,12 @@ bool llvm::isVectorizedStructTy(StructType *StructTy) {
return Ty->isVectorTy() && cast<VectorType>(Ty)->getElementCount() == VF;
});
}
+
+/// Returns true if `StructTy` is an unpacked literal struct where all elements
+/// are scalars that can be used as vector element types.
+bool llvm::canVectorizeStructTy(StructType *StructTy) {
+ auto ElemTys = StructTy->elements();
+ if (ElemTys.empty() || !isUnpackedStructLiteral(StructTy))
+ return false;
+ return all_of(ElemTys, VectorType::isValidElementType);
+}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 555c8435dd330d..c68e95a42be579 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -779,6 +779,18 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
return Scalarize;
}
+/// Returns true if the call return type `Ty` can be widened by the loop
+/// vectorizer.
+static bool canWidenCallReturnType(Type *Ty) {
+ auto *StructTy = dyn_cast<StructType>(Ty);
+ // TODO: Remove the homogeneous types restriction. This is just an initial
+ // simplification. When we want to support things like the overflow intrinsics
+ // we will have to lift this restriction.
+ if (StructTy && !StructTy->containsHomogeneousTypes())
+ return false;
+ return canVectorizeTy(StructTy);
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();
@@ -943,11 +955,24 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI && !VFDatabase::getMappings(*CI).empty())
VecCallVariantsFound = true;
+ auto canWidenInstruction = [this](Instruction const &Inst) {
+ Type *InstTy = Inst.getType();
+ if (isa<CallInst>(Inst) && isa<StructType>(InstTy) &&
+ canWidenCallReturnType(InstTy)) {
+ StructVecVecCallFound = true;
+ // For now, we can only widen struct values returned from calls where
+ // all users are extractvalue instructions.
+ return llvm::all_of(Inst.uses(), [](auto &Use) {
+ return isa<ExtractValueInst>(Use.getUser());
+ });
+ }
+ return VectorType::isValidElementType(InstTy) || InstTy->isVoidTy();
+ };
+
// Check that the instruction return type is vectorizable.
// We can't vectorize casts from vector type to scalar type.
// Also, we can't vectorize extractelement instructions.
- if ((!VectorType::isValidElementType(I.getType()) &&
- !I.getType()->isVoidTy()) ||
+ if (!canWidenInstruction(I) ||
(isa<CastInst>(I) &&
!VectorType::isValidElementType(I.getOperand(0)->getType())) ||
isa<ExtractElementInst>(I)) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a6acc710a34c89..c3e607eb5cf90f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10258,6 +10258,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
}
+ if (LVL.hasStructVectorCall()) {
+ constexpr StringLiteral FailureMessage(
+ "Auto-vectorization of calls that return struct types is not yet "
+ "supported");
+ reportVectorizationFailure(FailureMessage, FailureMessage,
+ "StructCallVectorizationUnsupported", ORE, L);
+ return false;
+ }
+
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll
new file mode 100644
index 00000000000000..0454272d3f3dd6
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -pass-remarks-analysis=loop-vectorize -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Tests basic vectorization of scalable homogeneous struct literal returns.
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f32_widen
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f64_widen
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
+ %in_val = load double, ptr %arrayidx, align 8
+ %call = tail call { double, double } @bar(double %in_val) #1
+ %extract_a = extractvalue { double, double } %call, 0
+ %extract_b = extractvalue { double, double } %call, 1
+ %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
+ store double %extract_a, ptr %arrayidx2, align 8
+ %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
+ store double %extract_b, ptr %arrayidx4, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+declare { float, float } @foo(float)
+declare { double, double } @bar(double)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
+declare { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double>, <vscale x 2 x i1>)
+
+
+attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
+attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_bar(scalable_vec_masked_bar)" }
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
new file mode 100644
index 00000000000000..1ac0c1670b9dc3
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -0,0 +1,268 @@
+; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-width=2 -force-vector-interleave=1 -pass-remarks-analysis=loop-vectorize -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; Tests basic vectorization of homogeneous struct literal returns.
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f32_widen
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f64_widen
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
+ %in_val = load double, ptr %arrayidx, align 8
+ %call = tail call { double, double } @bar(double %in_val) #1
+ %extract_a = extractvalue { double, double } %call, 0
+ %extract_b = extractvalue { double, double } %call, 1
+ %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
+ store double %extract_a, ptr %arrayidx2, align 8
+ %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
+ store double %extract_b, ptr %arrayidx4, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f32_replicate
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ ; #3 does not have a fixed-size vector mapping (so replication is used)
+ %call = tail call { float, float } @foo(float %in_val) #3
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Support vectorization in this case.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
+define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) {
+; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; Negative test. Widening structs with mixed element types is not supported.
+; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
+define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @negative_mixed_element_type_struct_return
+; CHECK-NOT: vector.body:
+; CHECK-NOT: call {{.*}} @fixed_vec_baz
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, i32 } @baz(float %in_val) #2
+ %extract_a = extractvalue { float, i32 } %call, 0
+ %extract_b = extractvalue { float, i32 } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32, ptr %out_b, i64 %iv
+ store i32 %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+%named_struct = type { double, double }
+
+; Negative test. Widening non-literal structs is not supported.
+; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
+define void @test_named_struct_return(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @test_named_struct_return
+; CHECK-NOT: vector.body:
+; CHECK-NOT: call {{.*}} @fixed_vec_bar
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
+ %in_val = load double, ptr %arrayidx, align 8
+ %call = tail call %named_struct @bar_named(double %in_val) #4
+ %extract_a = extractvalue %named_struct %call, 0
+ %extract_b = extractvalue %named_struct %call, 1
+ %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
+ store double %extract_a, ptr %arrayidx2, align 8
+ %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
+ store double %extract_b, ptr %arrayidx4, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: call instruction cannot be vectorized
+define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @test_overflow_intrinsic
+; CHECK-NOT: vector.body:
+; CHECK-NOT: @llvm.sadd.with.overflow.v{{.+}}i32
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load i32, ptr %arrayidx, align 4
+ %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val)
+ %extract_ret = extractvalue { i32, i1 } %call, 0
+ %extract_overflow = extractvalue { i32, i1 } %call, 1
+ %zext_overflow = zext i1 %extract_overflow to i8
+ %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv
+ store i32 %extract_ret, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv
+ store i8 %zext_overflow, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; Negative test. Widening struct loads is not supported.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
+define void @negative_struct_load(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @negative_struct_load
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv
+ %call = load { float, float }, ptr %arrayidx, align 8
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; Negative test. Widening struct stores is not supported.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
+define void @negative_struct_return_store_struct(ptr noalias %in, ptr noalias writeonly %out) {
+; CHECK-LABEL: define void @negative_struct_return_store_struct
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %out_ptr = getelementptr inbounds { float, float }, ptr %out, i64 %iv
+ store { float, float } %call, ptr %out_ptr, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+declare { float, float } @foo(float)
+declare { double, double } @bar(double)
+declare { float, i32 } @baz(float)
+declare %named_struct @bar_named(double)
+
+declare { <2 x float>, <2 x float> } @fixed_vec_foo(<2 x float>)
+declare { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double>)
+declare { <2 x float>, <2 x i32> } @fixed_vec_baz(<2 x float>)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
+
+attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_foo(fixed_vec_foo)" }
+attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar(fixed_vec_bar)" }
+attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_baz(fixed_vec_baz)" }
+attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
+attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar_named(fixed_vec_bar)" }
>From 129bdbd845ca4a6c64f11a64fbf5614878f437a7 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 18 Dec 2024 14:22:45 +0000
Subject: [PATCH 2/3] Rename test
---
llvm/test/Transforms/LoopVectorize/struct-return.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
index 1ac0c1670b9dc3..0a06484c85e32c 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -146,8 +146,8 @@ exit:
; Negative test. Widening non-literal structs is not supported.
; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
-define void @test_named_struct_return(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
-; CHECK-LABEL: define void @test_named_struct_return
+define void @negative_named_struct_return(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @negative_named_struct_return
; CHECK-NOT: vector.body:
; CHECK-NOT: call {{.*}} @fixed_vec_bar
entry:
>From 419e516d07f1962a0770572ae03eb85c181636b5 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 18 Dec 2024 15:23:28 +0000
Subject: [PATCH 3/3] Fixups
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +-
.../Transforms/LoopVectorize/struct-return.ll | 86 ++++++++++++++++---
2 files changed, 74 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c3e607eb5cf90f..b18f1d1472a0b0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10259,10 +10259,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
if (LVL.hasStructVectorCall()) {
- constexpr StringLiteral FailureMessage(
- "Auto-vectorization of calls that return struct types is not yet "
- "supported");
- reportVectorizationFailure(FailureMessage, FailureMessage,
+ reportVectorizationFailure("Auto-vectorization of calls that return struct "
+ "types is not yet supported",
"StructCallVectorizationUnsupported", ORE, L);
return false;
}
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
index 0a06484c85e32c..78e05d55d1c9da 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -114,6 +114,35 @@ exit:
ret void
}
+; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
+; CHECK-REMARKS: remark: {{.*}} loop not vectorized: call instruction cannot be vectorized
+define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @test_overflow_intrinsic
+; CHECK-NOT: vector.body:
+; CHECK-NOT: @llvm.sadd.with.overflow.v{{.+}}i32
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load i32, ptr %arrayidx, align 4
+ %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val)
+ %extract_ret = extractvalue { i32, i1 } %call, 0
+ %extract_overflow = extractvalue { i32, i1 } %call, 1
+ %zext_overflow = zext i1 %extract_overflow to i8
+ %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv
+ store i32 %extract_ret, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv
+ store i8 %zext_overflow, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
; Negative test. Widening structs with mixed element types is not supported.
; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@@ -172,27 +201,54 @@ exit:
ret void
}
-; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
-; CHECK-REMARKS: remark: {{.*}} loop not vectorized: call instruction cannot be vectorized
-define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
-; CHECK-LABEL: define void @test_overflow_intrinsic
+; Negative test. Nested homogeneous structs are not supported.
+; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
+define void @negative_nested_struct(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @negative_nested_struct
; CHECK-NOT: vector.body:
-; CHECK-NOT: @llvm.sadd.with.overflow.v{{.+}}i32
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
- %in_val = load i32, ptr %arrayidx, align 4
- %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val)
- %extract_ret = extractvalue { i32, i1 } %call, 0
- %extract_overflow = extractvalue { i32, i1 } %call, 1
- %zext_overflow = zext i1 %extract_overflow to i8
- %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv
- store i32 %extract_ret, ptr %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv
- store i8 %zext_overflow, ptr %arrayidx4, align 4
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { { float, float } } @foo_nested_struct(float %in_val) #0
+ %extract_inner = extractvalue { { float, float } } %call, 0
+ %extract_a = extractvalue { float, float } %extract_inner, 0
+ %extract_b = extractvalue { float, float } %extract_inner, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; Negative test. Homogeneous structs of arrays are not supported.
+; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
+define void @negative_struct_array_elements(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; CHECK-LABEL: define void @negative_struct_array_elements
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { [2 x float] } @foo_arrays(float %in_val) #0
+ %extract_inner = extractvalue { [2 x float] } %call, 0
+ %extract_a = extractvalue [2 x float] %extract_inner, 0
+ %extract_b = extractvalue [2 x float] %extract_inner, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
+ store float %extract_b, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %for.body
@@ -254,6 +310,8 @@ declare { float, float } @foo(float)
declare { double, double } @bar(double)
declare { float, i32 } @baz(float)
declare %named_struct @bar_named(double)
+declare { { float, float } } @foo_nested_struct(float)
+declare { [2 x float] } @foo_arrays(float)
declare { <2 x float>, <2 x float> } @fixed_vec_foo(<2 x float>)
declare { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double>)
More information about the llvm-commits
mailing list