[llvm] [SandboxVec] Optimization remarks (PR #129582)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 08:17:35 PDT 2025
https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/129582
>From ca19fca457da9cfcec807c48701071d6e2aac2bc Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Wed, 12 Mar 2025 16:47:54 -0700
Subject: [PATCH 1/2] [SandboxVec][StructInitVec] Initial pass implementation
---
.../SandboxVectorizer/Passes/StructInitVec.h | 50 +++
.../Vectorize/SandboxVectorizer/VecUtils.h | 38 ++-
llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 +
.../SandboxVectorizer/Passes/PassRegistry.def | 1 +
.../Passes/StructInitVec.cpp | 143 +++++++++
.../SandboxVectorizerPassBuilder.cpp | 1 +
.../SandboxVectorizer/struct_init_vec.ll | 289 ++++++++++++++++++
.../SandboxVectorizer/VecUtilsTest.cpp | 75 +++++
8 files changed, 596 insertions(+), 2 deletions(-)
create mode 100644 llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h
create mode 100644 llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
create mode 100644 llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h
new file mode 100644
index 0000000000000..3527807867846
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h
@@ -0,0 +1,50 @@
+//===- StructInitVec.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that vectorizes struct initializations.
+// Generic bottom-up vectorization cannot handle these because the
+// initialization instructions can be of different types.
+//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H
+#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/SandboxIR/Pass.h"
+
+namespace llvm {
+
+class DataLayout;
+
+namespace sandboxir {
+
+class Value;
+class Instruction;
+class Scheduler;
+class Type;
+
+class StructInitVec final : public RegionPass {
+ const DataLayout *DL = nullptr;
+ /// Checks legality of vectorization and \returns the vector type on success,
+ /// nullopt otherwise.
+ std::optional<Type *> canVectorize(ArrayRef<Instruction *> Bndl,
+ Scheduler &Sched);
+
+ void tryEraseDeadInstrs(ArrayRef<Instruction *> Stores,
+ ArrayRef<Instruction *> Loads);
+
+public:
+ StructInitVec() : RegionPass("struct-init-vec") {}
+ bool runOnRegion(Region &Rgn, const Analyses &A) final;
+};
+
+} // namespace sandboxir
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
index d32bfbaf7a4c8..3d57a2f574bb8 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
@@ -67,8 +67,8 @@ class VecUtils {
return *Diff == ElmBytes;
}
- template <typename LoadOrStoreT>
- static bool areConsecutive(ArrayRef<Value *> &Bndl, ScalarEvolution &SE,
+ template <typename LoadOrStoreT, typename ValT>
+ static bool areConsecutive(ArrayRef<ValT *> Bndl, ScalarEvolution &SE,
const DataLayout &DL) {
static_assert(std::is_same<LoadOrStoreT, LoadInst>::value ||
std::is_same<LoadOrStoreT, StoreInst>::value,
@@ -85,6 +85,11 @@ class VecUtils {
}
return true;
}
+ template <typename LoadOrStoreT>
+ static bool areConsecutive(ArrayRef<Value *> Bndl, ScalarEvolution &SE,
+ const DataLayout &DL) {
+ return areConsecutive<LoadOrStoreT, Value>(Bndl, SE, DL);
+ }
/// \Returns the number of vector lanes of \p Ty or 1 if not a vector.
/// NOTE: It asserts that \p Ty is a fixed vector type.
@@ -119,6 +124,35 @@ class VecUtils {
}
return FixedVectorType::get(ElemTy, NumElts);
}
+ /// \Returns the combined vector type for \p Bndl, even when the element types
+ /// differ. For example: i8,i8,i16 will return <4 x i8>. \Returns null if
+ /// types are of mixed float/integer types.
+ static Type *getCombinedVectorTypeFor(ArrayRef<Instruction *> Bndl,
+ const DataLayout &DL) {
+ assert(!Bndl.empty() && "Expected non-empty Bndl!");
+ unsigned TotalBits = 0;
+ unsigned MinElmBits = std::numeric_limits<unsigned>::max();
+ Type *MinElmTy = nullptr;
+ bool LastIsFloat = false;
+ for (auto [Idx, V] : enumerate(Bndl)) {
+ Type *ElmTy = getElementType(Utils::getExpectedType(V));
+
+ // Reject mixed integer/float types.
+ bool IsFloat = ElmTy->isFloatingPointTy();
+ if (Idx != 0 && IsFloat != LastIsFloat)
+ return nullptr;
+ LastIsFloat = IsFloat;
+
+ unsigned ElmBits = Utils::getNumBits(ElmTy, DL);
+ TotalBits += ElmBits * VecUtils::getNumLanes(V);
+ if (ElmBits < MinElmBits) {
+ MinElmBits = ElmBits;
+ MinElmTy = ElmTy;
+ }
+ }
+ unsigned NumElms = TotalBits / MinElmBits;
+ return FixedVectorType::get(MinElmTy, NumElms);
+ }
/// \Returns the instruction in \p Instrs that is lowest in the BB. Expects
/// that all instructions are in the same BB.
static Instruction *getLowest(ArrayRef<Instruction *> Instrs) {
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 96670fe3ea195..c16056aebdf3f 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_component_library(LLVMVectorize
SandboxVectorizer/Passes/RegionsFromBBs.cpp
SandboxVectorizer/Passes/RegionsFromMetadata.cpp
SandboxVectorizer/Passes/SeedCollection.cpp
+ SandboxVectorizer/Passes/StructInitVec.cpp
SandboxVectorizer/Passes/TransactionAcceptOrRevert.cpp
SandboxVectorizer/Passes/TransactionSave.cpp
SandboxVectorizer/SandboxVectorizer.cpp
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def
index 02b973926854d..657778cfd2b62 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def
@@ -26,6 +26,7 @@ REGION_PASS("tr-accept", ::llvm::sandboxir::TransactionAlwaysAccept)
REGION_PASS("tr-revert", ::llvm::sandboxir::TransactionAlwaysRevert)
REGION_PASS("tr-accept-or-revert", ::llvm::sandboxir::TransactionAcceptOrRevert)
REGION_PASS("bottom-up-vec", ::llvm::sandboxir::BottomUpVec)
+REGION_PASS("struct-init-vec", ::llvm::sandboxir::StructInitVec)
#undef REGION_PASS
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
new file mode 100644
index 0000000000000..8c7eb85148d56
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
@@ -0,0 +1,143 @@
+//===- StructInitVec.cpp - Vectorizer pass for struct initializations -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h"
+#include "llvm/SandboxIR/Module.h"
+#include "llvm/SandboxIR/Region.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
+
+namespace llvm {
+
+namespace sandboxir {
+
+std::optional<Type *> StructInitVec::canVectorize(ArrayRef<Instruction *> Bndl,
+ Scheduler &Sched) {
+ // Check if in the same BB.
+ auto *BB = cast<Instruction>(Bndl[0])->getParent();
+ if (any_of(drop_begin(Bndl),
+ [BB](auto *V) { return cast<Instruction>(V)->getParent() != BB; }))
+ return std::nullopt;
+
+ // Check if instructions repeat.
+ SmallPtrSet<Value *, 8> Unique(Bndl.begin(), Bndl.end());
+ if (Unique.size() != Bndl.size())
+ return std::nullopt;
+
+ // Don't mix integer with floating point.
+ bool IsFloat = false;
+ bool IsInteger = false;
+ for ([[maybe_unused]] auto *I : Bndl) {
+ if (Utils::getExpectedType(Bndl[0])->isFloatingPointTy())
+ IsFloat = true;
+ else
+ IsInteger = true;
+ }
+ if (IsFloat && IsInteger)
+ return std::nullopt;
+
+ Type *VecTy = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
+ if (VecTy == nullptr)
+ return std::nullopt;
+
+ // Check scheduling.
+ if (!Sched.trySchedule(Bndl))
+ return std::nullopt;
+
+ return VecTy;
+}
+
+void StructInitVec::tryEraseDeadInstrs(ArrayRef<Instruction *> Stores,
+ ArrayRef<Instruction *> Loads) {
+ SmallPtrSet<Instruction *, 8> DeadCandidates;
+ for (auto *SI : Stores) {
+ if (auto *PtrI =
+ dyn_cast<Instruction>(cast<StoreInst>(SI)->getPointerOperand()))
+ DeadCandidates.insert(PtrI);
+ SI->eraseFromParent();
+ }
+ for (auto *LI : Loads) {
+ if (auto *PtrI =
+ dyn_cast<Instruction>(cast<LoadInst>(LI)->getPointerOperand()))
+ DeadCandidates.insert(PtrI);
+ cast<LoadInst>(LI)->eraseFromParent();
+ }
+ for (auto *PtrI : DeadCandidates)
+ if (!PtrI->hasNUsesOrMore(1))
+ PtrI->eraseFromParent();
+}
+
+bool StructInitVec::runOnRegion(Region &Rgn, const Analyses &A) {
+ SmallVector<Instruction *, 8> Bndl(Rgn.getAux().begin(), Rgn.getAux().end());
+ assert(Bndl.size() >= 2 && "Bad slice!");
+ Function &F = *Bndl[0]->getParent()->getParent();
+ DL = &F.getParent()->getDataLayout();
+ auto &Ctx = F.getContext();
+ Scheduler Sched(A.getAA(), Ctx);
+ if (!VecUtils::areConsecutive<StoreInst, Instruction>(
+ Bndl, A.getScalarEvolution(), *DL))
+ return false;
+ if (!canVectorize(Bndl, Sched))
+ return false;
+
+ SmallVector<Value *, 4> Operands;
+ Operands.reserve(Bndl.size());
+ for (auto *I : Bndl) {
+ auto *Op = cast<StoreInst>(I)->getValueOperand();
+ Operands.push_back(Op);
+ }
+ BasicBlock *BB = Bndl[0]->getParent();
+ // TODO: For now we only support load operands.
+ // TODO: For now we don't cross BBs.
+ if (!all_of(Operands, [BB](Value *V) {
+ auto *LI = dyn_cast<LoadInst>(V);
+ if (LI == nullptr)
+ return false;
+ if (LI->getParent() != BB)
+ return false;
+ if (LI->hasNUsesOrMore(2))
+ return false;
+ return true;
+ }))
+ return false;
+ // TODO: Try to avoid the extra copy to an instruction vector.
+ SmallVector<Instruction *, 8> Loads;
+ Loads.reserve(Operands.size());
+ for (Value *Op : Operands)
+ Loads.push_back(cast<Instruction>(Op));
+
+ bool Consecutive = VecUtils::areConsecutive<LoadInst, Instruction>(
+ Loads, A.getScalarEvolution(), *DL);
+ if (!Consecutive)
+ return false;
+ if (!canVectorize(Loads, Sched))
+ return false;
+
+ // Generate vector store and vector load
+ Type *Ty = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
+ Value *LdPtr = cast<LoadInst>(Loads[0])->getPointerOperand();
+ // TODO: Compute alignment.
+ Align LdAlign(1);
+ auto LdWhereIt = std::next(VecUtils::getLowest(Loads)->getIterator());
+ auto *VecLd =
+ LoadInst::create(Ty, LdPtr, LdAlign, LdWhereIt, Ctx, "VecIinitL");
+
+ Value *StPtr = cast<StoreInst>(Bndl[0])->getPointerOperand();
+ // TODO: Compute alignment.
+ Align StAlign(1);
+ auto StWhereIt = std::next(VecUtils::getLowest(Bndl)->getIterator());
+ StoreInst::create(VecLd, StPtr, StAlign, StWhereIt, Ctx);
+
+ tryEraseDeadInstrs(Bndl, Loads);
+ return true;
+}
+
+} // namespace sandboxir
+
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp
index 8432b066f966c..e168ddab9baba 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp
@@ -8,6 +8,7 @@
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromBBs.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAcceptOrRevert.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAlwaysAccept.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAlwaysRevert.h"
diff --git a/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
new file mode 100644
index 0000000000000..70b02a6993fa6
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<(enable-diff-types)struct-init-vec>" %s -S | FileCheck %s
+
+define void @struct_init_basic(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_basic(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <4 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META0:![0-9]+]]
+; CHECK-NEXT: store <4 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META0]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 1
+ %ptr2 = getelementptr i8, ptr %ptr, i32 2
+ %ld0 = load i8, ptr %ptr0
+ %ld1 = load i8, ptr %ptr1
+ %ld2 = load i16, ptr %ptr2
+ store i8 %ld0, ptr %ptr0
+ store i8 %ld1, ptr %ptr1
+ store i16 %ld2, ptr %ptr2
+ ret void
+}
+
+define void @struct_init_non_pow2(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_non_pow2(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <3 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META1:![0-9]+]]
+; CHECK-NEXT: store <3 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META1]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 1
+ %ld0 = load i8, ptr %ptr0
+ %ld1 = load i16, ptr %ptr1
+ store i8 %ld0, ptr %ptr0
+ store i16 %ld1, ptr %ptr1
+ ret void
+}
+
+define void @struct_init_vectorize_vectors(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_vectorize_vectors(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <4 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META2:![0-9]+]]
+; CHECK-NEXT: store <4 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META2]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 2
+ %ld0 = load <2 x i8>, ptr %ptr0
+ %ld1 = load <2 x i8>, ptr %ptr1
+ store <2 x i8> %ld0, ptr %ptr0
+ store <2 x i8> %ld1, ptr %ptr1
+ ret void
+}
+
+define void @struct_init_vectorize_vectors_diff_types(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_vectorize_vectors_diff_types(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <8 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META3:![0-9]+]]
+; CHECK-NEXT: store <8 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META3]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 2
+ %ptr2 = getelementptr i8, ptr %ptr, i32 4
+ %ld0 = load i16, ptr %ptr0
+ %ld1 = load <2 x i8>, ptr %ptr1
+ %ld2 = load <2 x i16>, ptr %ptr2
+ store i16 %ld0, ptr %ptr0
+ store <2 x i8> %ld1, ptr %ptr1
+ store <2 x i16> %ld2, ptr %ptr2
+ ret void
+}
+
+; Don't vectorize if there is a gap.
+define void @struct_init_gap(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_gap(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 2
+; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR0]], align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i16, ptr [[PTR1]], align 2
+; CHECK-NEXT: store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META4:![0-9]+]]
+; CHECK-NEXT: store i16 [[LD1]], ptr [[PTR1]], align 2, !sandboxvec [[META4]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 2
+ %ld0 = load i8, ptr %ptr0
+ %ld1 = load i16, ptr %ptr1
+ store i8 %ld0, ptr %ptr0
+ store i16 %ld1, ptr %ptr1
+ ret void
+}
+
+define void @struct_init_loads_not_consecutive(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_loads_not_consecutive(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 2
+; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR1]], align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i16, ptr [[PTR0]], align 2
+; CHECK-NEXT: store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META5:![0-9]+]]
+; CHECK-NEXT: store i16 [[LD1]], ptr [[PTR1]], align 2, !sandboxvec [[META5]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 2
+ %ld0 = load i8, ptr %ptr1
+ %ld1 = load i16, ptr %ptr0
+ store i8 %ld0, ptr %ptr0
+ store i16 %ld1, ptr %ptr1
+ ret void
+}
+
+; Vectorize same types, even if bottom-up-vec could do so too.
+define void @struct_init_same_types(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_same_types(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META6:![0-9]+]]
+; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META6]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 1
+ %ld0 = load i8, ptr %ptr0
+ %ld1 = load i8, ptr %ptr1
+ store i8 %ld0, ptr %ptr0
+ store i8 %ld1, ptr %ptr1
+ ret void
+}
+
+; Don't vectorize mixed integers/floats.
+define void @struct_init_mixed_int_float(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_mixed_int_float(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1
+; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[PTR0]], align 4
+; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
+; CHECK-NEXT: store i32 [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META7:![0-9]+]]
+; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4, !sandboxvec [[META7]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i32, ptr %ptr, i32 0
+ %ptr1 = getelementptr i32, ptr %ptr, i32 1
+ %ld0 = load i32, ptr %ptr0
+ %ld1 = load float, ptr %ptr1
+ store i32 %ld0, ptr %ptr0
+ store float %ld1, ptr %ptr1
+ ret void
+}
+
+define void @struct_init_mixed_int_float_vectors(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_mixed_int_float_vectors(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1
+; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[PTR0]], align 4
+; CHECK-NEXT: [[LD1:%.*]] = load <2 x float>, ptr [[PTR1]], align 8
+; CHECK-NEXT: store i32 [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META8:![0-9]+]]
+; CHECK-NEXT: store <2 x float> [[LD1]], ptr [[PTR1]], align 8, !sandboxvec [[META8]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i32, ptr %ptr, i32 0
+ %ptr1 = getelementptr i32, ptr %ptr, i32 1
+ %ld0 = load i32, ptr %ptr0
+ %ld1 = load <2 x float>, ptr %ptr1
+ store i32 %ld0, ptr %ptr0
+ store <2 x float> %ld1, ptr %ptr1
+ ret void
+}
+
+; Don't cross BBs (for now).
+define ptr @struct_init_dont_cross_bbs(ptr %ptr) {
+; CHECK-LABEL: define ptr @struct_init_dont_cross_bbs(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
+; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR0]], align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR1]], align 1
+; CHECK-NEXT: br label %[[BB:.*]]
+; CHECK: [[BB]]:
+; CHECK-NEXT: store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META9:![0-9]+]]
+; CHECK-NEXT: store i8 [[LD1]], ptr [[PTR1]], align 1, !sandboxvec [[META9]]
+; CHECK-NEXT: ret ptr [[PTR1]]
+;
+entry:
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 1
+ %ld0 = load i8, ptr %ptr0
+ %ld1 = load i8, ptr %ptr1
+ br label %bb
+
+bb:
+ store i8 %ld0, ptr %ptr0
+ store i8 %ld1, ptr %ptr1
+ ret ptr %ptr1
+}
+
+; Check that all dead GEPs are removed.
+define void @struct_init_cleanup_geps(ptr %ptrA, ptr %ptrB) {
+; CHECK-LABEL: define void @struct_init_cleanup_geps(
+; CHECK-SAME: ptr [[PTRA:%.*]], ptr [[PTRB:%.*]]) {
+; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr i8, ptr [[PTRA]], i32 0
+; CHECK-NEXT: [[PTRB0:%.*]] = getelementptr i8, ptr [[PTRB]], i32 0
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTRA0]], align 1, !sandboxvec [[META10:![0-9]+]]
+; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTRB0]], align 1, !sandboxvec [[META10]]
+; CHECK-NEXT: ret void
+;
+ %ptrA0 = getelementptr i8, ptr %ptrA, i32 0
+ %ptrA1 = getelementptr i8, ptr %ptrA, i32 1
+ %ptrB0 = getelementptr i8, ptr %ptrB, i32 0
+ %ptrB1 = getelementptr i8, ptr %ptrB, i32 1
+ %ld0 = load i8, ptr %ptrA0
+ %ld1 = load i8, ptr %ptrA1
+ store i8 %ld0, ptr %ptrB0
+ store i8 %ld1, ptr %ptrB1
+ ret void
+}
+
+; Check that we don't try to erase GEPs with other users.
+define ptr @struct_init_cleanup_gep_with_external_use(ptr %ptr) {
+; CHECK-LABEL: define ptr @struct_init_cleanup_gep_with_external_use(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META11:![0-9]+]]
+; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META11]]
+; CHECK-NEXT: ret ptr [[PTR1]]
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 1
+ %ld0 = load i8, ptr %ptr0
+ %ld1 = load i8, ptr %ptr1
+ store i8 %ld0, ptr %ptr0
+ store i8 %ld1, ptr %ptr1
+ ret ptr %ptr1
+}
+
+; Check that we schedule both loads and stores
+define void @struct_init_schedule_stores_and_loads(ptr noalias %ptrA, ptr noalias %ptrB) {
+; CHECK-LABEL: define void @struct_init_schedule_stores_and_loads(
+; CHECK-SAME: ptr noalias [[PTRA:%.*]], ptr noalias [[PTRB:%.*]]) {
+; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr i8, ptr [[PTRA]], i64 0
+; CHECK-NEXT: [[PTRB0:%.*]] = getelementptr i8, ptr [[PTRB]], i64 0
+; CHECK-NEXT: [[PTRB1:%.*]] = getelementptr i8, ptr [[PTRB]], i64 1
+; CHECK-NEXT: [[OTHER:%.*]] = load i8, ptr [[PTRB1]], align 1
+; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTRA0]], align 1, !sandboxvec [[META12:![0-9]+]]
+; CHECK-NEXT: store i8 0, ptr [[PTRA0]], align 1
+; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTRB0]], align 1, !sandboxvec [[META12]]
+; CHECK-NEXT: ret void
+;
+ %ptrA0 = getelementptr i8, ptr %ptrA, i64 0
+ %ptrA1 = getelementptr i8, ptr %ptrA, i64 1
+ %ptrB0 = getelementptr i8, ptr %ptrB, i64 0
+ %ptrB1 = getelementptr i8, ptr %ptrB, i64 1
+
+ %ld0 = load i8, ptr %ptrA0
+ store i8 %ld0, ptr %ptrB0
+
+ store i8 0, ptr %ptrA0
+ %other = load i8, ptr %ptrB1
+
+ %ld1 = load i8, ptr %ptrA1
+ store i8 %ld1, ptr %ptrB1
+ ret void
+}
+
+;.
+; CHECK: [[META0]] = distinct !{!"sandboxregion"}
+; CHECK: [[META1]] = distinct !{!"sandboxregion"}
+; CHECK: [[META2]] = distinct !{!"sandboxregion"}
+; CHECK: [[META3]] = distinct !{!"sandboxregion"}
+; CHECK: [[META4]] = distinct !{!"sandboxregion"}
+; CHECK: [[META5]] = distinct !{!"sandboxregion"}
+; CHECK: [[META6]] = distinct !{!"sandboxregion"}
+; CHECK: [[META7]] = distinct !{!"sandboxregion"}
+; CHECK: [[META8]] = distinct !{!"sandboxregion"}
+; CHECK: [[META9]] = distinct !{!"sandboxregion"}
+; CHECK: [[META10]] = distinct !{!"sandboxregion"}
+; CHECK: [[META11]] = distinct !{!"sandboxregion"}
+; CHECK: [[META12]] = distinct !{!"sandboxregion"}
+;.
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
index 2bfea6908305c..03eb32fdf6002 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/SandboxIR/Context.h"
#include "llvm/SandboxIR/Function.h"
+#include "llvm/SandboxIR/Module.h"
#include "llvm/SandboxIR/Type.h"
#include "llvm/Support/SourceMgr.h"
#include "gmock/gmock.h"
@@ -424,6 +425,80 @@ TEST_F(VecUtilsTest, GetWideType) {
EXPECT_EQ(sandboxir::VecUtils::getWideType(Int32X4Ty, 2), Int32X8Ty);
}
+TEST_F(VecUtilsTest, GetCombinedVectorTypeFor) {
+ parseIR(R"IR(
+define void @foo(ptr %ptr, i8 %i8, i16 %i16, i32 %i32, float %f32, double %f64, <2 x i8> %v2xi8, <2 x i16> %v2xi16) {
+ store i8 %i8, ptr %ptr
+ store i16 %i16, ptr %ptr
+ store i32 %i32, ptr %ptr
+ store float %f32, ptr %ptr
+ store double %f64, ptr %ptr
+ store <2 x i8> %v2xi8, ptr %ptr
+ store <2 x i16> %v2xi16, ptr %ptr
+ ret void
+}
+)IR");
+ Function &LLVMF = *M->getFunction("foo");
+
+ sandboxir::Context Ctx(C);
+ auto &F = *Ctx.createFunction(&LLVMF);
+ auto &BB = *F.begin();
+ const auto &DL = F.getParent()->getDataLayout();
+ auto It = BB.begin();
+ auto *Store_i8 = &*It++;
+ auto *Store_i16 = &*It++;
+ auto *Store_i32 = &*It++;
+ auto *Store_f32 = &*It++;
+ auto *Store_f64 = &*It++;
+ auto *Store_2xi8 = &*It++;
+ auto *Store_2xi16 = &*It++;
+
+ auto *I8Ty = sandboxir::IntegerType::get(Ctx, 8);
+ auto *I16Ty = sandboxir::IntegerType::get(Ctx, 16);
+ auto *F32Ty = sandboxir::Type::getFloatTy(Ctx);
+
+ // Check same type.
+ EXPECT_EQ(
+ sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i8, Store_i8}, DL),
+ sandboxir::FixedVectorType::get(I8Ty, 2));
+ EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+ {Store_2xi8, Store_2xi8}, DL),
+ sandboxir::FixedVectorType::get(I8Ty, 4));
+
+ // Check different types, power-of-two.
+ EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+ {Store_i8, Store_i8, Store_i16}, DL),
+ sandboxir::FixedVectorType::get(I8Ty, 4));
+ EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+ {Store_i8, Store_i8, Store_i16, Store_i32}, DL),
+ sandboxir::FixedVectorType::get(I8Ty, 8));
+ EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+ {Store_2xi8, Store_2xi8, Store_2xi16}, DL),
+ sandboxir::FixedVectorType::get(I8Ty, 8));
+
+ // Check different types non-power-of-two.
+ EXPECT_EQ(
+ sandboxir::VecUtils::getCombinedVectorTypeFor({Store_f32, Store_f64}, DL),
+ sandboxir::FixedVectorType::get(F32Ty, 3));
+ EXPECT_EQ(
+ sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i32, Store_i16}, DL),
+ sandboxir::FixedVectorType::get(I16Ty, 3));
+ EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+ {Store_i8, Store_i16, Store_i32}, DL),
+ sandboxir::FixedVectorType::get(I8Ty, 7));
+ EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+ {Store_i8, Store_i16, Store_2xi8}, DL),
+ sandboxir::FixedVectorType::get(I8Ty, 5));
+
+ // Mix float and integer.
+ EXPECT_EQ(
+ sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i32, Store_f32}, DL),
+ nullptr);
+ EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+ {Store_f32, Store_2xi8}, DL),
+ nullptr);
+}
+
TEST_F(VecUtilsTest, GetLowest) {
parseIR(R"IR(
define void @foo(i8 %v) {
>From 196aec327cc56f4d9f2e00d89a264453cffddf4e Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Fri, 14 Mar 2025 12:52:08 -0700
Subject: [PATCH 2/2] [SandboxVec][StructInitVec] Add support for constants
---
.../Passes/StructInitVec.cpp | 94 ++++++++++++-------
.../SandboxVectorizer/struct_init_vec.ll | 54 +++++++++++
2 files changed, 115 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
index 8c7eb85148d56..a895619995294 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
@@ -93,46 +93,74 @@ bool StructInitVec::runOnRegion(Region &Rgn, const Analyses &A) {
Operands.push_back(Op);
}
BasicBlock *BB = Bndl[0]->getParent();
- // TODO: For now we only support load operands.
- // TODO: For now we don't cross BBs.
- if (!all_of(Operands, [BB](Value *V) {
- auto *LI = dyn_cast<LoadInst>(V);
- if (LI == nullptr)
- return false;
- if (LI->getParent() != BB)
- return false;
- if (LI->hasNUsesOrMore(2))
- return false;
- return true;
- }))
- return false;
- // TODO: Try to avoid the extra copy to an instruction vector.
- SmallVector<Instruction *, 8> Loads;
- Loads.reserve(Operands.size());
- for (Value *Op : Operands)
- Loads.push_back(cast<Instruction>(Op));
-
- bool Consecutive = VecUtils::areConsecutive<LoadInst, Instruction>(
- Loads, A.getScalarEvolution(), *DL);
- if (!Consecutive)
- return false;
- if (!canVectorize(Loads, Sched))
+ bool AllLoads = all_of(Operands, [BB](Value *V) {
+ auto *LI = dyn_cast<LoadInst>(V);
+ if (LI == nullptr)
+ return false;
+ // TODO: For now we don't cross BBs.
+ if (LI->getParent() != BB)
+ return false;
+ if (LI->hasNUsesOrMore(2))
+ return false;
+ return true;
+ });
+ bool AllConstants =
+ all_of(Operands, [](Value *V) { return isa<Constant>(V); });
+ if (!AllLoads && !AllConstants)
return false;
- // Generate vector store and vector load
- Type *Ty = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
- Value *LdPtr = cast<LoadInst>(Loads[0])->getPointerOperand();
- // TODO: Compute alignment.
- Align LdAlign(1);
- auto LdWhereIt = std::next(VecUtils::getLowest(Loads)->getIterator());
- auto *VecLd =
- LoadInst::create(Ty, LdPtr, LdAlign, LdWhereIt, Ctx, "VecIinitL");
+ Value *VecOp = nullptr;
+ SmallVector<Instruction *, 8> Loads;
+ if (AllLoads) {
+ // TODO: Try to avoid the extra copy to an instruction vector.
+ Loads.reserve(Operands.size());
+ for (Value *Op : Operands)
+ Loads.push_back(cast<Instruction>(Op));
+
+ bool Consecutive = VecUtils::areConsecutive<LoadInst, Instruction>(
+ Loads, A.getScalarEvolution(), *DL);
+ if (!Consecutive)
+ return false;
+ if (!canVectorize(Loads, Sched))
+ return false;
+
+ // Generate vector load.
+ Type *Ty = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
+ Value *LdPtr = cast<LoadInst>(Loads[0])->getPointerOperand();
+ // TODO: Compute alignment.
+ Align LdAlign(1);
+ auto LdWhereIt = std::next(VecUtils::getLowest(Loads)->getIterator());
+ VecOp = LoadInst::create(Ty, LdPtr, LdAlign, LdWhereIt, Ctx, "VecIinitL");
+ } else if (AllConstants) {
+ SmallVector<Constant *, 8> Constants;
+ Constants.reserve(Operands.size());
+ for (Value *Op : Operands) {
+ auto *COp = cast<Constant>(Op);
+ if (auto *AggrCOp = dyn_cast<ConstantAggregate>(COp)) {
+ // If the operand is a constant aggregate, then append all its elements.
+ for (Value *Elm : AggrCOp->operands())
+ Constants.push_back(cast<Constant>(Elm));
+ } else if (auto *SeqCOp = dyn_cast<ConstantDataSequential>(COp)) {
+ for (auto ElmIdx : seq<unsigned>(SeqCOp->getNumElements()))
+ Constants.push_back(SeqCOp->getElementAsConstant(ElmIdx));
+ } else if (auto *Zero = dyn_cast<ConstantAggregateZero>(COp)) {
+ auto *ZeroElm = Zero->getSequentialElement();
+ for (auto ElmIdx :
+ seq<unsigned>(Zero->getElementCount().getFixedValue()))
+ Constants.push_back(ZeroElm);
+ } else {
+ Constants.push_back(COp);
+ }
+ }
+ VecOp = ConstantVector::get(Constants);
+ }
+ // Generate vector store.
Value *StPtr = cast<StoreInst>(Bndl[0])->getPointerOperand();
// TODO: Compute alignment.
Align StAlign(1);
auto StWhereIt = std::next(VecUtils::getLowest(Bndl)->getIterator());
- StoreInst::create(VecLd, StPtr, StAlign, StWhereIt, Ctx);
+ StoreInst::create(VecOp, StPtr, StAlign, StWhereIt, Ctx);
tryEraseDeadInstrs(Bndl, Loads);
return true;
diff --git a/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
index 70b02a6993fa6..6f8dd54404e98 100644
--- a/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
@@ -272,6 +272,57 @@ define void @struct_init_schedule_stores_and_loads(ptr noalias %ptrA, ptr noalia
ret void
}
+; Store-constant pattern.
+define void @struct_init_constants(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_constants(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: store <3 x i8> <i8 42, i8 43, i8 44>, ptr [[PTR0]], align 1, !sandboxvec [[META13:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 1
+ %ptr2 = getelementptr i8, ptr %ptr, i32 3
+ store i8 42, ptr %ptr0
+ store i16 43, ptr %ptr1
+ store i8 44, ptr %ptr2
+ ret void
+}
+
+; Same but with ConstantDataSequential.
+define void @struct_init_constants_CDS(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_constants_CDS(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT: store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr [[PTR0]], align 1, !sandboxvec [[META14:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr i8, ptr %ptr, i32 0
+ %ptr1 = getelementptr i8, ptr %ptr, i32 1
+ %ptr2 = getelementptr i8, ptr %ptr, i32 3
+ store i8 0, ptr %ptr0
+ store <2 x i8> <i8 1, i8 2>, ptr %ptr1
+ store i8 3, ptr %ptr2
+ ret void
+}
+
+; Same but with floats
+define void @struct_init_constants_CDS_float(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_constants_CDS_float(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; CHECK-NEXT: store <8 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, ptr [[PTR0]], align 1, !sandboxvec [[META15:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr float, ptr %ptr, i32 0
+ %ptr1 = getelementptr float, ptr %ptr, i32 1
+ %ptr2 = getelementptr float, ptr %ptr, i32 3
+ store float 1.0, ptr %ptr0
+ store <2 x float> zeroinitializer, ptr %ptr1
+ store <5 x float> zeroinitializer, ptr %ptr2
+ ret void
+}
+
;.
; CHECK: [[META0]] = distinct !{!"sandboxregion"}
; CHECK: [[META1]] = distinct !{!"sandboxregion"}
@@ -286,4 +337,7 @@ define void @struct_init_schedule_stores_and_loads(ptr noalias %ptrA, ptr noalia
; CHECK: [[META10]] = distinct !{!"sandboxregion"}
; CHECK: [[META11]] = distinct !{!"sandboxregion"}
; CHECK: [[META12]] = distinct !{!"sandboxregion"}
+; CHECK: [[META13]] = distinct !{!"sandboxregion"}
+; CHECK: [[META14]] = distinct !{!"sandboxregion"}
+; CHECK: [[META15]] = distinct !{!"sandboxregion"}
;.
More information about the llvm-commits
mailing list